1 /* $NetBSD: vfs_syscalls.c,v 1.471 2013/11/27 17:24:44 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.471 2013/11/27 17:24:44 christos Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 #include <miscfs/specfs/specdev.h> 113 114 #include <nfs/rpcv2.h> 115 #include <nfs/nfsproto.h> 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 119 static int change_flags(struct vnode *, u_long, struct lwp *); 120 static int change_mode(struct vnode *, int, struct lwp *l); 121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 122 static int do_open(lwp_t *, struct vnode *, struct pathbuf *, int, int, int *); 123 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 124 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 125 enum uio_seg); 126 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 127 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 128 enum uio_seg); 129 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 130 enum uio_seg, int); 131 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 132 size_t, register_t *); 133 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 134 135 static int fd_nameiat(struct lwp *, int, struct nameidata *); 136 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 137 namei_simple_flags_t, struct vnode **); 138 139 140 /* 141 * This table is used to maintain compatibility with 4.3BSD 142 * and NetBSD 0.9 mount syscalls - and possibly other systems. 143 * Note, the order is important! 144 * 145 * Do not modify this table. It should only contain filesystems 146 * supported by NetBSD 0.9 and 4.3BSD. 147 */ 148 const char * const mountcompatnames[] = { 149 NULL, /* 0 = MOUNT_NONE */ 150 MOUNT_FFS, /* 1 = MOUNT_UFS */ 151 MOUNT_NFS, /* 2 */ 152 MOUNT_MFS, /* 3 */ 153 MOUNT_MSDOS, /* 4 */ 154 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 155 MOUNT_FDESC, /* 6 */ 156 MOUNT_KERNFS, /* 7 */ 157 NULL, /* 8 = MOUNT_DEVFS */ 158 MOUNT_AFS, /* 9 */ 159 }; 160 161 const int nmountcompatnames = __arraycount(mountcompatnames); 162 163 static int 164 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 165 { 166 file_t *dfp; 167 int error; 168 169 if (fdat != AT_FDCWD) { 170 if ((error = fd_getvnode(fdat, &dfp)) != 0) 171 goto out; 172 173 NDAT(ndp, dfp->f_data); 174 } 175 176 error = namei(ndp); 177 178 if (fdat != AT_FDCWD) 179 fd_putfile(fdat); 180 out: 181 return error; 182 } 183 184 static int 185 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 186 namei_simple_flags_t sflags, struct vnode **vp_ret) 187 { 188 file_t *dfp; 189 struct vnode *dvp; 190 int error; 191 192 if (fdat != AT_FDCWD) { 193 if ((error = fd_getvnode(fdat, &dfp)) != 0) 194 goto out; 195 196 dvp = dfp->f_data; 197 } else { 198 dvp = NULL; 199 } 200 201 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 202 203 if (fdat != AT_FDCWD) 204 fd_putfile(fdat); 205 out: 206 return error; 207 } 208 209 static int 210 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 211 { 212 int error; 213 214 fp->f_flag = flags & FMASK; 215 fp->f_type = DTYPE_VNODE; 216 fp->f_ops = &vnops; 217 fp->f_data = vp; 218 219 if (flags & (O_EXLOCK | O_SHLOCK)) { 220 struct flock lf; 221 int type; 222 223 lf.l_whence = SEEK_SET; 224 lf.l_start = 0; 225 lf.l_len = 0; 226 if (flags & O_EXLOCK) 227 lf.l_type = F_WRLCK; 228 else 229 lf.l_type = F_RDLCK; 230 type = F_FLOCK; 231 if ((flags & FNONBLOCK) == 0) 232 type |= F_WAIT; 233 VOP_UNLOCK(vp); 234 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 235 if (error) { 236 (void) vn_close(vp, fp->f_flag, fp->f_cred); 237 fd_abort(l->l_proc, fp, indx); 238 return error; 239 } 240 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 241 atomic_or_uint(&fp->f_flag, FHASLOCK); 242 } 243 if (flags & O_CLOEXEC) 244 fd_set_exclose(l, indx, true); 245 return 0; 246 } 247 248 static int 249 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 250 void *data, size_t *data_len) 251 { 252 struct mount *mp; 253 int error = 0, saved_flags; 254 255 mp = vp->v_mount; 256 saved_flags = mp->mnt_flag; 257 258 /* We can operate only on VV_ROOT nodes. */ 259 if ((vp->v_vflag & VV_ROOT) == 0) { 260 error = EINVAL; 261 goto out; 262 } 263 264 /* 265 * We only allow the filesystem to be reloaded if it 266 * is currently mounted read-only. Additionally, we 267 * prevent read-write to read-only downgrades. 268 */ 269 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 270 (mp->mnt_flag & MNT_RDONLY) == 0 && 271 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 272 error = EOPNOTSUPP; /* Needs translation */ 273 goto out; 274 } 275 276 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 277 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 278 if (error) 279 goto out; 280 281 if (vfs_busy(mp, NULL)) { 282 error = EPERM; 283 goto out; 284 } 285 286 mutex_enter(&mp->mnt_updating); 287 288 mp->mnt_flag &= ~MNT_OP_FLAGS; 289 mp->mnt_flag |= flags & MNT_OP_FLAGS; 290 291 /* 292 * Set the mount level flags. 293 */ 294 if (flags & MNT_RDONLY) 295 mp->mnt_flag |= MNT_RDONLY; 296 else if (mp->mnt_flag & MNT_RDONLY) 297 mp->mnt_iflag |= IMNT_WANTRDWR; 298 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 299 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 300 error = VFS_MOUNT(mp, path, data, data_len); 301 302 if (error && data != NULL) { 303 int error2; 304 305 /* 306 * Update failed; let's try and see if it was an 307 * export request. For compat with 3.0 and earlier. 308 */ 309 error2 = vfs_hooks_reexport(mp, path, data); 310 311 /* 312 * Only update error code if the export request was 313 * understood but some problem occurred while 314 * processing it. 315 */ 316 if (error2 != EJUSTRETURN) 317 error = error2; 318 } 319 320 if (mp->mnt_iflag & IMNT_WANTRDWR) 321 mp->mnt_flag &= ~MNT_RDONLY; 322 if (error) 323 mp->mnt_flag = saved_flags; 324 mp->mnt_flag &= ~MNT_OP_FLAGS; 325 mp->mnt_iflag &= ~IMNT_WANTRDWR; 326 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 327 if (mp->mnt_syncer == NULL) 328 error = vfs_allocate_syncvnode(mp); 329 } else { 330 if (mp->mnt_syncer != NULL) 331 vfs_deallocate_syncvnode(mp); 332 } 333 mutex_exit(&mp->mnt_updating); 334 vfs_unbusy(mp, false, NULL); 335 336 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 337 (flags & MNT_EXTATTR)) { 338 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 339 NULL, 0, NULL) != 0) { 340 printf("%s: failed to start extattr, error = %d", 341 mp->mnt_stat.f_mntonname, error); 342 mp->mnt_flag &= ~MNT_EXTATTR; 343 } 344 } 345 346 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 347 !(flags & MNT_EXTATTR)) { 348 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 349 NULL, 0, NULL) != 0) { 350 printf("%s: failed to stop extattr, error = %d", 351 mp->mnt_stat.f_mntonname, error); 352 mp->mnt_flag |= MNT_RDONLY; 353 } 354 } 355 out: 356 return (error); 357 } 358 359 static int 360 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 361 { 362 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 363 int error; 364 365 /* Copy file-system type from userspace. */ 366 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 367 if (error) { 368 /* 369 * Historically, filesystem types were identified by numbers. 370 * If we get an integer for the filesystem type instead of a 371 * string, we check to see if it matches one of the historic 372 * filesystem types. 373 */ 374 u_long fsindex = (u_long)fstype; 375 if (fsindex >= nmountcompatnames || 376 mountcompatnames[fsindex] == NULL) 377 return ENODEV; 378 strlcpy(fstypename, mountcompatnames[fsindex], 379 sizeof(fstypename)); 380 } 381 382 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 383 if (strcmp(fstypename, "ufs") == 0) 384 fstypename[0] = 'f'; 385 386 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 387 return 0; 388 389 /* If we can autoload a vfs module, try again */ 390 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 391 392 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 393 return 0; 394 395 return ENODEV; 396 } 397 398 static int 399 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 400 void *data, size_t *data_len) 401 { 402 struct mount *mp; 403 int error; 404 405 /* If MNT_GETARGS is specified, it should be the only flag. */ 406 if (flags & ~MNT_GETARGS) 407 return EINVAL; 408 409 mp = vp->v_mount; 410 411 /* XXX: probably some notion of "can see" here if we want isolation. */ 412 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 413 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 414 if (error) 415 return error; 416 417 if ((vp->v_vflag & VV_ROOT) == 0) 418 return EINVAL; 419 420 if (vfs_busy(mp, NULL)) 421 return EPERM; 422 423 mutex_enter(&mp->mnt_updating); 424 mp->mnt_flag &= ~MNT_OP_FLAGS; 425 mp->mnt_flag |= MNT_GETARGS; 426 error = VFS_MOUNT(mp, path, data, data_len); 427 mp->mnt_flag &= ~MNT_OP_FLAGS; 428 mutex_exit(&mp->mnt_updating); 429 430 vfs_unbusy(mp, false, NULL); 431 return (error); 432 } 433 434 int 435 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 436 { 437 /* { 438 syscallarg(const char *) type; 439 syscallarg(const char *) path; 440 syscallarg(int) flags; 441 syscallarg(void *) data; 442 syscallarg(size_t) data_len; 443 } */ 444 445 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 446 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 447 SCARG(uap, data_len), retval); 448 } 449 450 int 451 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 452 const char *path, int flags, void *data, enum uio_seg data_seg, 453 size_t data_len, register_t *retval) 454 { 455 struct vnode *vp; 456 void *data_buf = data; 457 bool vfsopsrele = false; 458 int error; 459 460 /* XXX: The calling convention of this routine is totally bizarre */ 461 if (vfsops) 462 vfsopsrele = true; 463 464 /* 465 * Get vnode to be covered 466 */ 467 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 468 if (error != 0) { 469 vp = NULL; 470 goto done; 471 } 472 473 if (vfsops == NULL) { 474 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 475 vfsops = vp->v_mount->mnt_op; 476 } else { 477 /* 'type' is userspace */ 478 error = mount_get_vfsops(type, &vfsops); 479 if (error != 0) 480 goto done; 481 vfsopsrele = true; 482 } 483 } 484 485 if (data != NULL && data_seg == UIO_USERSPACE) { 486 if (data_len == 0) { 487 /* No length supplied, use default for filesystem */ 488 data_len = vfsops->vfs_min_mount_data; 489 if (data_len > VFS_MAX_MOUNT_DATA) { 490 error = EINVAL; 491 goto done; 492 } 493 /* 494 * Hopefully a longer buffer won't make copyin() fail. 495 * For compatibility with 3.0 and earlier. 496 */ 497 if (flags & MNT_UPDATE 498 && data_len < sizeof (struct mnt_export_args30)) 499 data_len = sizeof (struct mnt_export_args30); 500 } 501 data_buf = kmem_alloc(data_len, KM_SLEEP); 502 503 /* NFS needs the buffer even for mnt_getargs .... */ 504 error = copyin(data, data_buf, data_len); 505 if (error != 0) 506 goto done; 507 } 508 509 if (flags & MNT_GETARGS) { 510 if (data_len == 0) { 511 error = EINVAL; 512 goto done; 513 } 514 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 515 if (error != 0) 516 goto done; 517 if (data_seg == UIO_USERSPACE) 518 error = copyout(data_buf, data, data_len); 519 *retval = data_len; 520 } else if (flags & MNT_UPDATE) { 521 error = mount_update(l, vp, path, flags, data_buf, &data_len); 522 } else { 523 /* Locking is handled internally in mount_domount(). */ 524 KASSERT(vfsopsrele == true); 525 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 526 &data_len); 527 vfsopsrele = false; 528 } 529 530 done: 531 if (vfsopsrele) 532 vfs_delref(vfsops); 533 if (vp != NULL) { 534 vrele(vp); 535 } 536 if (data_buf != data) 537 kmem_free(data_buf, data_len); 538 return (error); 539 } 540 541 /* 542 * Unmount a file system. 543 * 544 * Note: unmount takes a path to the vnode mounted on as argument, 545 * not special file (as before). 546 */ 547 /* ARGSUSED */ 548 int 549 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 550 { 551 /* { 552 syscallarg(const char *) path; 553 syscallarg(int) flags; 554 } */ 555 struct vnode *vp; 556 struct mount *mp; 557 int error; 558 struct pathbuf *pb; 559 struct nameidata nd; 560 561 error = pathbuf_copyin(SCARG(uap, path), &pb); 562 if (error) { 563 return error; 564 } 565 566 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 567 if ((error = namei(&nd)) != 0) { 568 pathbuf_destroy(pb); 569 return error; 570 } 571 vp = nd.ni_vp; 572 pathbuf_destroy(pb); 573 574 mp = vp->v_mount; 575 atomic_inc_uint(&mp->mnt_refcnt); 576 VOP_UNLOCK(vp); 577 578 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 579 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 580 if (error) { 581 vrele(vp); 582 vfs_destroy(mp); 583 return (error); 584 } 585 586 /* 587 * Don't allow unmounting the root file system. 588 */ 589 if (mp->mnt_flag & MNT_ROOTFS) { 590 vrele(vp); 591 vfs_destroy(mp); 592 return (EINVAL); 593 } 594 595 /* 596 * Must be the root of the filesystem 597 */ 598 if ((vp->v_vflag & VV_ROOT) == 0) { 599 vrele(vp); 600 vfs_destroy(mp); 601 return (EINVAL); 602 } 603 604 vrele(vp); 605 error = dounmount(mp, SCARG(uap, flags), l); 606 vfs_destroy(mp); 607 return error; 608 } 609 610 /* 611 * Sync each mounted filesystem. 612 */ 613 #ifdef DEBUG 614 int syncprt = 0; 615 struct ctldebug debug0 = { "syncprt", &syncprt }; 616 #endif 617 618 void 619 do_sys_sync(struct lwp *l) 620 { 621 struct mount *mp, *nmp; 622 int asyncflag; 623 624 mutex_enter(&mountlist_lock); 625 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 626 if (vfs_busy(mp, &nmp)) { 627 continue; 628 } 629 mutex_enter(&mp->mnt_updating); 630 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 631 asyncflag = mp->mnt_flag & MNT_ASYNC; 632 mp->mnt_flag &= ~MNT_ASYNC; 633 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 634 if (asyncflag) 635 mp->mnt_flag |= MNT_ASYNC; 636 } 637 mutex_exit(&mp->mnt_updating); 638 vfs_unbusy(mp, false, &nmp); 639 } 640 mutex_exit(&mountlist_lock); 641 #ifdef DEBUG 642 if (syncprt) 643 vfs_bufstats(); 644 #endif /* DEBUG */ 645 } 646 647 /* ARGSUSED */ 648 int 649 sys_sync(struct lwp *l, const void *v, register_t *retval) 650 { 651 do_sys_sync(l); 652 return (0); 653 } 654 655 656 /* 657 * Access or change filesystem quotas. 658 * 659 * (this is really 14 different calls bundled into one) 660 */ 661 662 static int 663 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 664 { 665 struct quotastat info_k; 666 int error; 667 668 /* ensure any padding bytes are cleared */ 669 memset(&info_k, 0, sizeof(info_k)); 670 671 error = vfs_quotactl_stat(mp, &info_k); 672 if (error) { 673 return error; 674 } 675 676 return copyout(&info_k, info_u, sizeof(info_k)); 677 } 678 679 static int 680 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 681 struct quotaidtypestat *info_u) 682 { 683 struct quotaidtypestat info_k; 684 int error; 685 686 /* ensure any padding bytes are cleared */ 687 memset(&info_k, 0, sizeof(info_k)); 688 689 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 690 if (error) { 691 return error; 692 } 693 694 return copyout(&info_k, info_u, sizeof(info_k)); 695 } 696 697 static int 698 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 699 struct quotaobjtypestat *info_u) 700 { 701 struct quotaobjtypestat info_k; 702 int error; 703 704 /* ensure any padding bytes are cleared */ 705 memset(&info_k, 0, sizeof(info_k)); 706 707 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 708 if (error) { 709 return error; 710 } 711 712 return copyout(&info_k, info_u, sizeof(info_k)); 713 } 714 715 static int 716 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 717 struct quotaval *val_u) 718 { 719 struct quotakey key_k; 720 struct quotaval val_k; 721 int error; 722 723 /* ensure any padding bytes are cleared */ 724 memset(&val_k, 0, sizeof(val_k)); 725 726 error = copyin(key_u, &key_k, sizeof(key_k)); 727 if (error) { 728 return error; 729 } 730 731 error = vfs_quotactl_get(mp, &key_k, &val_k); 732 if (error) { 733 return error; 734 } 735 736 return copyout(&val_k, val_u, sizeof(val_k)); 737 } 738 739 static int 740 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 741 const struct quotaval *val_u) 742 { 743 struct quotakey key_k; 744 struct quotaval val_k; 745 int error; 746 747 error = copyin(key_u, &key_k, sizeof(key_k)); 748 if (error) { 749 return error; 750 } 751 752 error = copyin(val_u, &val_k, sizeof(val_k)); 753 if (error) { 754 return error; 755 } 756 757 return vfs_quotactl_put(mp, &key_k, &val_k); 758 } 759 760 static int 761 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u) 762 { 763 struct quotakey key_k; 764 int error; 765 766 error = copyin(key_u, &key_k, sizeof(key_k)); 767 if (error) { 768 return error; 769 } 770 771 return vfs_quotactl_delete(mp, &key_k); 772 } 773 774 static int 775 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 776 { 777 struct quotakcursor cursor_k; 778 int error; 779 780 /* ensure any padding bytes are cleared */ 781 memset(&cursor_k, 0, sizeof(cursor_k)); 782 783 error = vfs_quotactl_cursoropen(mp, &cursor_k); 784 if (error) { 785 return error; 786 } 787 788 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 789 } 790 791 static int 792 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 793 { 794 struct quotakcursor cursor_k; 795 int error; 796 797 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 798 if (error) { 799 return error; 800 } 801 802 return vfs_quotactl_cursorclose(mp, &cursor_k); 803 } 804 805 static int 806 do_sys_quotactl_cursorskipidtype(struct mount *mp, 807 struct quotakcursor *cursor_u, int idtype) 808 { 809 struct quotakcursor cursor_k; 810 int error; 811 812 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 813 if (error) { 814 return error; 815 } 816 817 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 818 if (error) { 819 return error; 820 } 821 822 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 823 } 824 825 static int 826 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 827 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 828 unsigned *ret_u) 829 { 830 #define CGET_STACK_MAX 8 831 struct quotakcursor cursor_k; 832 struct quotakey stackkeys[CGET_STACK_MAX]; 833 struct quotaval stackvals[CGET_STACK_MAX]; 834 struct quotakey *keys_k; 835 struct quotaval *vals_k; 836 unsigned ret_k; 837 int error; 838 839 if (maxnum > 128) { 840 maxnum = 128; 841 } 842 843 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 844 if (error) { 845 return error; 846 } 847 848 if (maxnum <= CGET_STACK_MAX) { 849 keys_k = stackkeys; 850 vals_k = stackvals; 851 /* ensure any padding bytes are cleared */ 852 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 853 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 854 } else { 855 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 856 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 857 } 858 859 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 860 &ret_k); 861 if (error) { 862 goto fail; 863 } 864 865 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 866 if (error) { 867 goto fail; 868 } 869 870 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 871 if (error) { 872 goto fail; 873 } 874 875 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 876 if (error) { 877 goto fail; 878 } 879 880 /* do last to maximize the chance of being able to recover a failure */ 881 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 882 883 fail: 884 if (keys_k != stackkeys) { 885 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 886 } 887 if (vals_k != stackvals) { 888 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 889 } 890 return error; 891 } 892 893 static int 894 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 895 int *ret_u) 896 { 897 struct quotakcursor cursor_k; 898 int ret_k; 899 int error; 900 901 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 902 if (error) { 903 return error; 904 } 905 906 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 907 if (error) { 908 return error; 909 } 910 911 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 912 if (error) { 913 return error; 914 } 915 916 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 917 } 918 919 static int 920 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 921 { 922 struct quotakcursor cursor_k; 923 int error; 924 925 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 926 if (error) { 927 return error; 928 } 929 930 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 931 if (error) { 932 return error; 933 } 934 935 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 936 } 937 938 static int 939 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 940 { 941 char *path_k; 942 int error; 943 944 /* XXX this should probably be a struct pathbuf */ 945 path_k = PNBUF_GET(); 946 error = copyin(path_u, path_k, PATH_MAX); 947 if (error) { 948 PNBUF_PUT(path_k); 949 return error; 950 } 951 952 error = vfs_quotactl_quotaon(mp, idtype, path_k); 953 954 PNBUF_PUT(path_k); 955 return error; 956 } 957 958 static int 959 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 960 { 961 return vfs_quotactl_quotaoff(mp, idtype); 962 } 963 964 int 965 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 966 { 967 struct mount *mp; 968 struct vnode *vp; 969 int error; 970 971 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 972 if (error != 0) 973 return (error); 974 mp = vp->v_mount; 975 976 switch (args->qc_op) { 977 case QUOTACTL_STAT: 978 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 979 break; 980 case QUOTACTL_IDTYPESTAT: 981 error = do_sys_quotactl_idtypestat(mp, 982 args->u.idtypestat.qc_idtype, 983 args->u.idtypestat.qc_info); 984 break; 985 case QUOTACTL_OBJTYPESTAT: 986 error = do_sys_quotactl_objtypestat(mp, 987 args->u.objtypestat.qc_objtype, 988 args->u.objtypestat.qc_info); 989 break; 990 case QUOTACTL_GET: 991 error = do_sys_quotactl_get(mp, 992 args->u.get.qc_key, 993 args->u.get.qc_val); 994 break; 995 case QUOTACTL_PUT: 996 error = do_sys_quotactl_put(mp, 997 args->u.put.qc_key, 998 args->u.put.qc_val); 999 break; 1000 case QUOTACTL_DELETE: 1001 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key); 1002 break; 1003 case QUOTACTL_CURSOROPEN: 1004 error = do_sys_quotactl_cursoropen(mp, 1005 args->u.cursoropen.qc_cursor); 1006 break; 1007 case QUOTACTL_CURSORCLOSE: 1008 error = do_sys_quotactl_cursorclose(mp, 1009 args->u.cursorclose.qc_cursor); 1010 break; 1011 case QUOTACTL_CURSORSKIPIDTYPE: 1012 error = do_sys_quotactl_cursorskipidtype(mp, 1013 args->u.cursorskipidtype.qc_cursor, 1014 args->u.cursorskipidtype.qc_idtype); 1015 break; 1016 case QUOTACTL_CURSORGET: 1017 error = do_sys_quotactl_cursorget(mp, 1018 args->u.cursorget.qc_cursor, 1019 args->u.cursorget.qc_keys, 1020 args->u.cursorget.qc_vals, 1021 args->u.cursorget.qc_maxnum, 1022 args->u.cursorget.qc_ret); 1023 break; 1024 case QUOTACTL_CURSORATEND: 1025 error = do_sys_quotactl_cursoratend(mp, 1026 args->u.cursoratend.qc_cursor, 1027 args->u.cursoratend.qc_ret); 1028 break; 1029 case QUOTACTL_CURSORREWIND: 1030 error = do_sys_quotactl_cursorrewind(mp, 1031 args->u.cursorrewind.qc_cursor); 1032 break; 1033 case QUOTACTL_QUOTAON: 1034 error = do_sys_quotactl_quotaon(mp, 1035 args->u.quotaon.qc_idtype, 1036 args->u.quotaon.qc_quotafile); 1037 break; 1038 case QUOTACTL_QUOTAOFF: 1039 error = do_sys_quotactl_quotaoff(mp, 1040 args->u.quotaoff.qc_idtype); 1041 break; 1042 default: 1043 error = EINVAL; 1044 break; 1045 } 1046 1047 vrele(vp); 1048 return error; 1049 } 1050 1051 /* ARGSUSED */ 1052 int 1053 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1054 register_t *retval) 1055 { 1056 /* { 1057 syscallarg(const char *) path; 1058 syscallarg(struct quotactl_args *) args; 1059 } */ 1060 struct quotactl_args args; 1061 int error; 1062 1063 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1064 if (error) { 1065 return error; 1066 } 1067 1068 return do_sys_quotactl(SCARG(uap, path), &args); 1069 } 1070 1071 int 1072 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1073 int root) 1074 { 1075 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1076 int error = 0; 1077 1078 /* 1079 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1080 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1081 * overrides MNT_NOWAIT. 1082 */ 1083 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1084 (flags != MNT_WAIT && flags != 0)) { 1085 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1086 goto done; 1087 } 1088 1089 /* Get the filesystem stats now */ 1090 memset(sp, 0, sizeof(*sp)); 1091 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1092 return error; 1093 } 1094 1095 if (cwdi->cwdi_rdir == NULL) 1096 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1097 done: 1098 if (cwdi->cwdi_rdir != NULL) { 1099 size_t len; 1100 char *bp; 1101 char c; 1102 char *path = PNBUF_GET(); 1103 1104 bp = path + MAXPATHLEN; 1105 *--bp = '\0'; 1106 rw_enter(&cwdi->cwdi_lock, RW_READER); 1107 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1108 MAXPATHLEN / 2, 0, l); 1109 rw_exit(&cwdi->cwdi_lock); 1110 if (error) { 1111 PNBUF_PUT(path); 1112 return error; 1113 } 1114 len = strlen(bp); 1115 if (len != 1) { 1116 /* 1117 * for mount points that are below our root, we can see 1118 * them, so we fix up the pathname and return them. The 1119 * rest we cannot see, so we don't allow viewing the 1120 * data. 1121 */ 1122 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1123 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1124 (void)strlcpy(sp->f_mntonname, 1125 c == '\0' ? "/" : &sp->f_mntonname[len], 1126 sizeof(sp->f_mntonname)); 1127 } else { 1128 if (root) 1129 (void)strlcpy(sp->f_mntonname, "/", 1130 sizeof(sp->f_mntonname)); 1131 else 1132 error = EPERM; 1133 } 1134 } 1135 PNBUF_PUT(path); 1136 } 1137 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1138 return error; 1139 } 1140 1141 /* 1142 * Get filesystem statistics by path. 1143 */ 1144 int 1145 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1146 { 1147 struct mount *mp; 1148 int error; 1149 struct vnode *vp; 1150 1151 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1152 if (error != 0) 1153 return error; 1154 mp = vp->v_mount; 1155 error = dostatvfs(mp, sb, l, flags, 1); 1156 vrele(vp); 1157 return error; 1158 } 1159 1160 /* ARGSUSED */ 1161 int 1162 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1163 { 1164 /* { 1165 syscallarg(const char *) path; 1166 syscallarg(struct statvfs *) buf; 1167 syscallarg(int) flags; 1168 } */ 1169 struct statvfs *sb; 1170 int error; 1171 1172 sb = STATVFSBUF_GET(); 1173 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1174 if (error == 0) 1175 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1176 STATVFSBUF_PUT(sb); 1177 return error; 1178 } 1179 1180 /* 1181 * Get filesystem statistics by fd. 1182 */ 1183 int 1184 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1185 { 1186 file_t *fp; 1187 struct mount *mp; 1188 int error; 1189 1190 /* fd_getvnode() will use the descriptor for us */ 1191 if ((error = fd_getvnode(fd, &fp)) != 0) 1192 return (error); 1193 mp = ((struct vnode *)fp->f_data)->v_mount; 1194 error = dostatvfs(mp, sb, curlwp, flags, 1); 1195 fd_putfile(fd); 1196 return error; 1197 } 1198 1199 /* ARGSUSED */ 1200 int 1201 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1202 { 1203 /* { 1204 syscallarg(int) fd; 1205 syscallarg(struct statvfs *) buf; 1206 syscallarg(int) flags; 1207 } */ 1208 struct statvfs *sb; 1209 int error; 1210 1211 sb = STATVFSBUF_GET(); 1212 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1213 if (error == 0) 1214 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1215 STATVFSBUF_PUT(sb); 1216 return error; 1217 } 1218 1219 1220 /* 1221 * Get statistics on all filesystems. 1222 */ 1223 int 1224 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1225 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1226 register_t *retval) 1227 { 1228 int root = 0; 1229 struct proc *p = l->l_proc; 1230 struct mount *mp, *nmp; 1231 struct statvfs *sb; 1232 size_t count, maxcount; 1233 int error = 0; 1234 1235 sb = STATVFSBUF_GET(); 1236 maxcount = bufsize / entry_sz; 1237 mutex_enter(&mountlist_lock); 1238 count = 0; 1239 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1240 if (vfs_busy(mp, &nmp)) { 1241 continue; 1242 } 1243 if (sfsp && count < maxcount) { 1244 error = dostatvfs(mp, sb, l, flags, 0); 1245 if (error) { 1246 vfs_unbusy(mp, false, &nmp); 1247 error = 0; 1248 continue; 1249 } 1250 error = copyfn(sb, sfsp, entry_sz); 1251 if (error) { 1252 vfs_unbusy(mp, false, NULL); 1253 goto out; 1254 } 1255 sfsp = (char *)sfsp + entry_sz; 1256 root |= strcmp(sb->f_mntonname, "/") == 0; 1257 } 1258 count++; 1259 vfs_unbusy(mp, false, &nmp); 1260 } 1261 mutex_exit(&mountlist_lock); 1262 1263 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1264 /* 1265 * fake a root entry 1266 */ 1267 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1268 sb, l, flags, 1); 1269 if (error != 0) 1270 goto out; 1271 if (sfsp) { 1272 error = copyfn(sb, sfsp, entry_sz); 1273 if (error != 0) 1274 goto out; 1275 } 1276 count++; 1277 } 1278 if (sfsp && count > maxcount) 1279 *retval = maxcount; 1280 else 1281 *retval = count; 1282 out: 1283 STATVFSBUF_PUT(sb); 1284 return error; 1285 } 1286 1287 int 1288 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1289 { 1290 /* { 1291 syscallarg(struct statvfs *) buf; 1292 syscallarg(size_t) bufsize; 1293 syscallarg(int) flags; 1294 } */ 1295 1296 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1297 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1298 } 1299 1300 /* 1301 * Change current working directory to a given file descriptor. 1302 */ 1303 /* ARGSUSED */ 1304 int 1305 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1306 { 1307 /* { 1308 syscallarg(int) fd; 1309 } */ 1310 struct proc *p = l->l_proc; 1311 struct cwdinfo *cwdi; 1312 struct vnode *vp, *tdp; 1313 struct mount *mp; 1314 file_t *fp; 1315 int error, fd; 1316 1317 /* fd_getvnode() will use the descriptor for us */ 1318 fd = SCARG(uap, fd); 1319 if ((error = fd_getvnode(fd, &fp)) != 0) 1320 return (error); 1321 vp = fp->f_data; 1322 1323 vref(vp); 1324 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1325 if (vp->v_type != VDIR) 1326 error = ENOTDIR; 1327 else 1328 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1329 if (error) { 1330 vput(vp); 1331 goto out; 1332 } 1333 while ((mp = vp->v_mountedhere) != NULL) { 1334 error = vfs_busy(mp, NULL); 1335 vput(vp); 1336 if (error != 0) 1337 goto out; 1338 error = VFS_ROOT(mp, &tdp); 1339 vfs_unbusy(mp, false, NULL); 1340 if (error) 1341 goto out; 1342 vp = tdp; 1343 } 1344 VOP_UNLOCK(vp); 1345 1346 /* 1347 * Disallow changing to a directory not under the process's 1348 * current root directory (if there is one). 1349 */ 1350 cwdi = p->p_cwdi; 1351 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1352 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1353 vrele(vp); 1354 error = EPERM; /* operation not permitted */ 1355 } else { 1356 vrele(cwdi->cwdi_cdir); 1357 cwdi->cwdi_cdir = vp; 1358 } 1359 rw_exit(&cwdi->cwdi_lock); 1360 1361 out: 1362 fd_putfile(fd); 1363 return (error); 1364 } 1365 1366 /* 1367 * Change this process's notion of the root directory to a given file 1368 * descriptor. 1369 */ 1370 int 1371 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1372 { 1373 struct proc *p = l->l_proc; 1374 struct vnode *vp; 1375 file_t *fp; 1376 int error, fd = SCARG(uap, fd); 1377 1378 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1379 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1380 return error; 1381 /* fd_getvnode() will use the descriptor for us */ 1382 if ((error = fd_getvnode(fd, &fp)) != 0) 1383 return error; 1384 vp = fp->f_data; 1385 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1386 if (vp->v_type != VDIR) 1387 error = ENOTDIR; 1388 else 1389 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1390 VOP_UNLOCK(vp); 1391 if (error) 1392 goto out; 1393 vref(vp); 1394 1395 change_root(p->p_cwdi, vp, l); 1396 1397 out: 1398 fd_putfile(fd); 1399 return (error); 1400 } 1401 1402 /* 1403 * Change current working directory (``.''). 1404 */ 1405 /* ARGSUSED */ 1406 int 1407 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1408 { 1409 /* { 1410 syscallarg(const char *) path; 1411 } */ 1412 struct proc *p = l->l_proc; 1413 struct cwdinfo *cwdi; 1414 int error; 1415 struct vnode *vp; 1416 1417 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1418 &vp, l)) != 0) 1419 return (error); 1420 cwdi = p->p_cwdi; 1421 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1422 vrele(cwdi->cwdi_cdir); 1423 cwdi->cwdi_cdir = vp; 1424 rw_exit(&cwdi->cwdi_lock); 1425 return (0); 1426 } 1427 1428 /* 1429 * Change notion of root (``/'') directory. 1430 */ 1431 /* ARGSUSED */ 1432 int 1433 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1434 { 1435 /* { 1436 syscallarg(const char *) path; 1437 } */ 1438 struct proc *p = l->l_proc; 1439 int error; 1440 struct vnode *vp; 1441 1442 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1443 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1444 return (error); 1445 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1446 &vp, l)) != 0) 1447 return (error); 1448 1449 change_root(p->p_cwdi, vp, l); 1450 1451 return (0); 1452 } 1453 1454 /* 1455 * Common routine for chroot and fchroot. 1456 * NB: callers need to properly authorize the change root operation. 1457 */ 1458 void 1459 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1460 { 1461 struct proc *p = l->l_proc; 1462 kauth_cred_t ncred; 1463 1464 ncred = kauth_cred_alloc(); 1465 1466 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1467 if (cwdi->cwdi_rdir != NULL) 1468 vrele(cwdi->cwdi_rdir); 1469 cwdi->cwdi_rdir = vp; 1470 1471 /* 1472 * Prevent escaping from chroot by putting the root under 1473 * the working directory. Silently chdir to / if we aren't 1474 * already there. 1475 */ 1476 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1477 /* 1478 * XXX would be more failsafe to change directory to a 1479 * deadfs node here instead 1480 */ 1481 vrele(cwdi->cwdi_cdir); 1482 vref(vp); 1483 cwdi->cwdi_cdir = vp; 1484 } 1485 rw_exit(&cwdi->cwdi_lock); 1486 1487 /* Get a write lock on the process credential. */ 1488 proc_crmod_enter(); 1489 1490 kauth_cred_clone(p->p_cred, ncred); 1491 kauth_proc_chroot(ncred, p->p_cwdi); 1492 1493 /* Broadcast our credentials to the process and other LWPs. */ 1494 proc_crmod_leave(ncred, p->p_cred, true); 1495 } 1496 1497 /* 1498 * Common routine for chroot and chdir. 1499 * XXX "where" should be enum uio_seg 1500 */ 1501 int 1502 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1503 { 1504 struct pathbuf *pb; 1505 struct nameidata nd; 1506 int error; 1507 1508 error = pathbuf_maybe_copyin(path, where, &pb); 1509 if (error) { 1510 return error; 1511 } 1512 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1513 if ((error = namei(&nd)) != 0) { 1514 pathbuf_destroy(pb); 1515 return error; 1516 } 1517 *vpp = nd.ni_vp; 1518 pathbuf_destroy(pb); 1519 1520 if ((*vpp)->v_type != VDIR) 1521 error = ENOTDIR; 1522 else 1523 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1524 1525 if (error) 1526 vput(*vpp); 1527 else 1528 VOP_UNLOCK(*vpp); 1529 return (error); 1530 } 1531 1532 /* 1533 * Internals of sys_open - path has already been converted into a pathbuf 1534 * (so we can easily reuse this function from other parts of the kernel, 1535 * like posix_spawn post-processing). 1536 */ 1537 static int 1538 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1539 int open_mode, int *fd) 1540 { 1541 struct proc *p = l->l_proc; 1542 struct cwdinfo *cwdi = p->p_cwdi; 1543 file_t *fp; 1544 struct vnode *vp; 1545 int flags, cmode; 1546 int indx, error; 1547 struct nameidata nd; 1548 1549 if (open_flags & O_SEARCH) { 1550 open_flags &= ~(int)O_SEARCH; 1551 } 1552 1553 flags = FFLAGS(open_flags); 1554 if ((flags & (FREAD | FWRITE)) == 0) 1555 return EINVAL; 1556 1557 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1558 return error; 1559 } 1560 1561 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1562 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1563 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1564 if (dvp != NULL) 1565 NDAT(&nd, dvp); 1566 1567 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1568 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1569 fd_abort(p, fp, indx); 1570 if ((error == EDUPFD || error == EMOVEFD) && 1571 l->l_dupfd >= 0 && /* XXX from fdopen */ 1572 (error = 1573 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1574 *fd = indx; 1575 return 0; 1576 } 1577 if (error == ERESTART) 1578 error = EINTR; 1579 return error; 1580 } 1581 1582 l->l_dupfd = 0; 1583 vp = nd.ni_vp; 1584 1585 if ((error = open_setfp(l, fp, vp, indx, flags))) 1586 return error; 1587 1588 VOP_UNLOCK(vp); 1589 *fd = indx; 1590 fd_affix(p, fp, indx); 1591 return 0; 1592 } 1593 1594 int 1595 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1596 { 1597 struct pathbuf *pb; 1598 int error, oflags; 1599 1600 oflags = FFLAGS(open_flags); 1601 if ((oflags & (FREAD | FWRITE)) == 0) 1602 return EINVAL; 1603 1604 pb = pathbuf_create(path); 1605 if (pb == NULL) 1606 return ENOMEM; 1607 1608 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1609 pathbuf_destroy(pb); 1610 1611 return error; 1612 } 1613 1614 /* 1615 * Check permissions, allocate an open file structure, 1616 * and call the device open routine if any. 1617 */ 1618 static int 1619 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1620 int mode, int *fd) 1621 { 1622 file_t *dfp = NULL; 1623 struct vnode *dvp = NULL; 1624 struct pathbuf *pb; 1625 int error; 1626 1627 error = pathbuf_copyin(path, &pb); 1628 if (error) 1629 return error; 1630 1631 if (fdat != AT_FDCWD) { 1632 /* fd_getvnode() will use the descriptor for us */ 1633 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1634 goto out; 1635 1636 dvp = dfp->f_data; 1637 } 1638 1639 error = do_open(l, dvp, pb, flags, mode, fd); 1640 1641 if (dfp != NULL) 1642 fd_putfile(fdat); 1643 out: 1644 pathbuf_destroy(pb); 1645 return error; 1646 } 1647 1648 int 1649 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1650 { 1651 /* { 1652 syscallarg(const char *) path; 1653 syscallarg(int) flags; 1654 syscallarg(int) mode; 1655 } */ 1656 int error; 1657 int fd; 1658 1659 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1660 SCARG(uap, flags), SCARG(uap, mode), &fd); 1661 1662 if (error == 0) 1663 *retval = fd; 1664 1665 return error; 1666 } 1667 1668 int 1669 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1670 { 1671 /* { 1672 syscallarg(int) fd; 1673 syscallarg(const char *) path; 1674 syscallarg(int) oflags; 1675 syscallarg(int) mode; 1676 } */ 1677 int error; 1678 int fd; 1679 1680 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1681 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1682 1683 if (error == 0) 1684 *retval = fd; 1685 1686 return error; 1687 } 1688 1689 static void 1690 vfs__fhfree(fhandle_t *fhp) 1691 { 1692 size_t fhsize; 1693 1694 if (fhp == NULL) { 1695 return; 1696 } 1697 fhsize = FHANDLE_SIZE(fhp); 1698 kmem_free(fhp, fhsize); 1699 } 1700 1701 /* 1702 * vfs_composefh: compose a filehandle. 1703 */ 1704 1705 int 1706 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1707 { 1708 struct mount *mp; 1709 struct fid *fidp; 1710 int error; 1711 size_t needfhsize; 1712 size_t fidsize; 1713 1714 mp = vp->v_mount; 1715 fidp = NULL; 1716 if (*fh_size < FHANDLE_SIZE_MIN) { 1717 fidsize = 0; 1718 } else { 1719 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1720 if (fhp != NULL) { 1721 memset(fhp, 0, *fh_size); 1722 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1723 fidp = &fhp->fh_fid; 1724 } 1725 } 1726 error = VFS_VPTOFH(vp, fidp, &fidsize); 1727 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1728 if (error == 0 && *fh_size < needfhsize) { 1729 error = E2BIG; 1730 } 1731 *fh_size = needfhsize; 1732 return error; 1733 } 1734 1735 int 1736 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1737 { 1738 struct mount *mp; 1739 fhandle_t *fhp; 1740 size_t fhsize; 1741 size_t fidsize; 1742 int error; 1743 1744 *fhpp = NULL; 1745 mp = vp->v_mount; 1746 fidsize = 0; 1747 error = VFS_VPTOFH(vp, NULL, &fidsize); 1748 KASSERT(error != 0); 1749 if (error != E2BIG) { 1750 goto out; 1751 } 1752 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1753 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1754 if (fhp == NULL) { 1755 error = ENOMEM; 1756 goto out; 1757 } 1758 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1759 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1760 if (error == 0) { 1761 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1762 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1763 *fhpp = fhp; 1764 } else { 1765 kmem_free(fhp, fhsize); 1766 } 1767 out: 1768 return error; 1769 } 1770 1771 void 1772 vfs_composefh_free(fhandle_t *fhp) 1773 { 1774 1775 vfs__fhfree(fhp); 1776 } 1777 1778 /* 1779 * vfs_fhtovp: lookup a vnode by a filehandle. 1780 */ 1781 1782 int 1783 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1784 { 1785 struct mount *mp; 1786 int error; 1787 1788 *vpp = NULL; 1789 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1790 if (mp == NULL) { 1791 error = ESTALE; 1792 goto out; 1793 } 1794 if (mp->mnt_op->vfs_fhtovp == NULL) { 1795 error = EOPNOTSUPP; 1796 goto out; 1797 } 1798 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1799 out: 1800 return error; 1801 } 1802 1803 /* 1804 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1805 * the needed size. 1806 */ 1807 1808 int 1809 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1810 { 1811 fhandle_t *fhp; 1812 int error; 1813 1814 *fhpp = NULL; 1815 if (fhsize > FHANDLE_SIZE_MAX) { 1816 return EINVAL; 1817 } 1818 if (fhsize < FHANDLE_SIZE_MIN) { 1819 return EINVAL; 1820 } 1821 again: 1822 fhp = kmem_alloc(fhsize, KM_SLEEP); 1823 if (fhp == NULL) { 1824 return ENOMEM; 1825 } 1826 error = copyin(ufhp, fhp, fhsize); 1827 if (error == 0) { 1828 /* XXX this check shouldn't be here */ 1829 if (FHANDLE_SIZE(fhp) == fhsize) { 1830 *fhpp = fhp; 1831 return 0; 1832 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1833 /* 1834 * a kludge for nfsv2 padded handles. 1835 */ 1836 size_t sz; 1837 1838 sz = FHANDLE_SIZE(fhp); 1839 kmem_free(fhp, fhsize); 1840 fhsize = sz; 1841 goto again; 1842 } else { 1843 /* 1844 * userland told us wrong size. 1845 */ 1846 error = EINVAL; 1847 } 1848 } 1849 kmem_free(fhp, fhsize); 1850 return error; 1851 } 1852 1853 void 1854 vfs_copyinfh_free(fhandle_t *fhp) 1855 { 1856 1857 vfs__fhfree(fhp); 1858 } 1859 1860 /* 1861 * Get file handle system call 1862 */ 1863 int 1864 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1865 { 1866 /* { 1867 syscallarg(char *) fname; 1868 syscallarg(fhandle_t *) fhp; 1869 syscallarg(size_t *) fh_size; 1870 } */ 1871 struct vnode *vp; 1872 fhandle_t *fh; 1873 int error; 1874 struct pathbuf *pb; 1875 struct nameidata nd; 1876 size_t sz; 1877 size_t usz; 1878 1879 /* 1880 * Must be super user 1881 */ 1882 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1883 0, NULL, NULL, NULL); 1884 if (error) 1885 return (error); 1886 1887 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1888 if (error) { 1889 return error; 1890 } 1891 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1892 error = namei(&nd); 1893 if (error) { 1894 pathbuf_destroy(pb); 1895 return error; 1896 } 1897 vp = nd.ni_vp; 1898 pathbuf_destroy(pb); 1899 1900 error = vfs_composefh_alloc(vp, &fh); 1901 vput(vp); 1902 if (error != 0) { 1903 goto out; 1904 } 1905 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1906 if (error != 0) { 1907 goto out; 1908 } 1909 sz = FHANDLE_SIZE(fh); 1910 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1911 if (error != 0) { 1912 goto out; 1913 } 1914 if (usz >= sz) { 1915 error = copyout(fh, SCARG(uap, fhp), sz); 1916 } else { 1917 error = E2BIG; 1918 } 1919 out: 1920 vfs_composefh_free(fh); 1921 return (error); 1922 } 1923 1924 /* 1925 * Open a file given a file handle. 1926 * 1927 * Check permissions, allocate an open file structure, 1928 * and call the device open routine if any. 1929 */ 1930 1931 int 1932 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1933 register_t *retval) 1934 { 1935 file_t *fp; 1936 struct vnode *vp = NULL; 1937 kauth_cred_t cred = l->l_cred; 1938 file_t *nfp; 1939 int indx, error = 0; 1940 struct vattr va; 1941 fhandle_t *fh; 1942 int flags; 1943 proc_t *p; 1944 1945 p = curproc; 1946 1947 /* 1948 * Must be super user 1949 */ 1950 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1951 0, NULL, NULL, NULL))) 1952 return (error); 1953 1954 if (oflags & O_SEARCH) { 1955 oflags &= ~(int)O_SEARCH; 1956 } 1957 1958 flags = FFLAGS(oflags); 1959 if ((flags & (FREAD | FWRITE)) == 0) 1960 return (EINVAL); 1961 if ((flags & O_CREAT)) 1962 return (EINVAL); 1963 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1964 return (error); 1965 fp = nfp; 1966 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1967 if (error != 0) { 1968 goto bad; 1969 } 1970 error = vfs_fhtovp(fh, &vp); 1971 if (error != 0) { 1972 goto bad; 1973 } 1974 1975 /* Now do an effective vn_open */ 1976 1977 if (vp->v_type == VSOCK) { 1978 error = EOPNOTSUPP; 1979 goto bad; 1980 } 1981 error = vn_openchk(vp, cred, flags); 1982 if (error != 0) 1983 goto bad; 1984 if (flags & O_TRUNC) { 1985 VOP_UNLOCK(vp); /* XXX */ 1986 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1987 vattr_null(&va); 1988 va.va_size = 0; 1989 error = VOP_SETATTR(vp, &va, cred); 1990 if (error) 1991 goto bad; 1992 } 1993 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1994 goto bad; 1995 if (flags & FWRITE) { 1996 mutex_enter(vp->v_interlock); 1997 vp->v_writecount++; 1998 mutex_exit(vp->v_interlock); 1999 } 2000 2001 /* done with modified vn_open, now finish what sys_open does. */ 2002 if ((error = open_setfp(l, fp, vp, indx, flags))) 2003 return error; 2004 2005 VOP_UNLOCK(vp); 2006 *retval = indx; 2007 fd_affix(p, fp, indx); 2008 vfs_copyinfh_free(fh); 2009 return (0); 2010 2011 bad: 2012 fd_abort(p, fp, indx); 2013 if (vp != NULL) 2014 vput(vp); 2015 vfs_copyinfh_free(fh); 2016 return (error); 2017 } 2018 2019 int 2020 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2021 { 2022 /* { 2023 syscallarg(const void *) fhp; 2024 syscallarg(size_t) fh_size; 2025 syscallarg(int) flags; 2026 } */ 2027 2028 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2029 SCARG(uap, flags), retval); 2030 } 2031 2032 int 2033 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2034 { 2035 int error; 2036 fhandle_t *fh; 2037 struct vnode *vp; 2038 2039 /* 2040 * Must be super user 2041 */ 2042 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2043 0, NULL, NULL, NULL))) 2044 return (error); 2045 2046 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2047 if (error != 0) 2048 return error; 2049 2050 error = vfs_fhtovp(fh, &vp); 2051 vfs_copyinfh_free(fh); 2052 if (error != 0) 2053 return error; 2054 2055 error = vn_stat(vp, sb); 2056 vput(vp); 2057 return error; 2058 } 2059 2060 2061 /* ARGSUSED */ 2062 int 2063 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2064 { 2065 /* { 2066 syscallarg(const void *) fhp; 2067 syscallarg(size_t) fh_size; 2068 syscallarg(struct stat *) sb; 2069 } */ 2070 struct stat sb; 2071 int error; 2072 2073 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2074 if (error) 2075 return error; 2076 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2077 } 2078 2079 int 2080 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2081 int flags) 2082 { 2083 fhandle_t *fh; 2084 struct mount *mp; 2085 struct vnode *vp; 2086 int error; 2087 2088 /* 2089 * Must be super user 2090 */ 2091 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2092 0, NULL, NULL, NULL))) 2093 return error; 2094 2095 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2096 if (error != 0) 2097 return error; 2098 2099 error = vfs_fhtovp(fh, &vp); 2100 vfs_copyinfh_free(fh); 2101 if (error != 0) 2102 return error; 2103 2104 mp = vp->v_mount; 2105 error = dostatvfs(mp, sb, l, flags, 1); 2106 vput(vp); 2107 return error; 2108 } 2109 2110 /* ARGSUSED */ 2111 int 2112 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2113 { 2114 /* { 2115 syscallarg(const void *) fhp; 2116 syscallarg(size_t) fh_size; 2117 syscallarg(struct statvfs *) buf; 2118 syscallarg(int) flags; 2119 } */ 2120 struct statvfs *sb = STATVFSBUF_GET(); 2121 int error; 2122 2123 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2124 SCARG(uap, flags)); 2125 if (error == 0) 2126 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2127 STATVFSBUF_PUT(sb); 2128 return error; 2129 } 2130 2131 /* 2132 * Create a special file. 2133 */ 2134 /* ARGSUSED */ 2135 int 2136 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2137 register_t *retval) 2138 { 2139 /* { 2140 syscallarg(const char *) path; 2141 syscallarg(mode_t) mode; 2142 syscallarg(dev_t) dev; 2143 } */ 2144 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2145 SCARG(uap, dev), retval, UIO_USERSPACE); 2146 } 2147 2148 int 2149 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2150 register_t *retval) 2151 { 2152 /* { 2153 syscallarg(int) fd; 2154 syscallarg(const char *) path; 2155 syscallarg(mode_t) mode; 2156 syscallarg(int) pad; 2157 syscallarg(dev_t) dev; 2158 } */ 2159 2160 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2161 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2162 } 2163 2164 int 2165 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2166 register_t *retval, enum uio_seg seg) 2167 { 2168 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2169 } 2170 2171 int 2172 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2173 dev_t dev, register_t *retval, enum uio_seg seg) 2174 { 2175 struct proc *p = l->l_proc; 2176 struct vnode *vp; 2177 struct vattr vattr; 2178 int error, optype; 2179 struct pathbuf *pb; 2180 struct nameidata nd; 2181 const char *pathstring; 2182 2183 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2184 0, NULL, NULL, NULL)) != 0) 2185 return (error); 2186 2187 optype = VOP_MKNOD_DESCOFFSET; 2188 2189 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2190 if (error) { 2191 return error; 2192 } 2193 pathstring = pathbuf_stringcopy_get(pb); 2194 if (pathstring == NULL) { 2195 pathbuf_destroy(pb); 2196 return ENOMEM; 2197 } 2198 2199 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2200 2201 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2202 goto out; 2203 vp = nd.ni_vp; 2204 2205 if (vp != NULL) 2206 error = EEXIST; 2207 else { 2208 vattr_null(&vattr); 2209 /* We will read cwdi->cwdi_cmask unlocked. */ 2210 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2211 vattr.va_rdev = dev; 2212 2213 switch (mode & S_IFMT) { 2214 case S_IFMT: /* used by badsect to flag bad sectors */ 2215 vattr.va_type = VBAD; 2216 break; 2217 case S_IFCHR: 2218 vattr.va_type = VCHR; 2219 break; 2220 case S_IFBLK: 2221 vattr.va_type = VBLK; 2222 break; 2223 case S_IFWHT: 2224 optype = VOP_WHITEOUT_DESCOFFSET; 2225 break; 2226 case S_IFREG: 2227 #if NVERIEXEC > 0 2228 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2229 O_CREAT); 2230 #endif /* NVERIEXEC > 0 */ 2231 vattr.va_type = VREG; 2232 vattr.va_rdev = VNOVAL; 2233 optype = VOP_CREATE_DESCOFFSET; 2234 break; 2235 default: 2236 error = EINVAL; 2237 break; 2238 } 2239 } 2240 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2241 && vattr.va_rdev == VNOVAL) 2242 error = EINVAL; 2243 if (!error) { 2244 switch (optype) { 2245 case VOP_WHITEOUT_DESCOFFSET: 2246 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2247 if (error) 2248 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2249 vput(nd.ni_dvp); 2250 break; 2251 2252 case VOP_MKNOD_DESCOFFSET: 2253 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2254 &nd.ni_cnd, &vattr); 2255 if (error == 0) 2256 vput(nd.ni_vp); 2257 break; 2258 2259 case VOP_CREATE_DESCOFFSET: 2260 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2261 &nd.ni_cnd, &vattr); 2262 if (error == 0) 2263 vput(nd.ni_vp); 2264 break; 2265 } 2266 } else { 2267 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2268 if (nd.ni_dvp == vp) 2269 vrele(nd.ni_dvp); 2270 else 2271 vput(nd.ni_dvp); 2272 if (vp) 2273 vrele(vp); 2274 } 2275 out: 2276 pathbuf_stringcopy_put(pb, pathstring); 2277 pathbuf_destroy(pb); 2278 return (error); 2279 } 2280 2281 /* 2282 * Create a named pipe. 2283 */ 2284 /* ARGSUSED */ 2285 int 2286 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2287 { 2288 /* { 2289 syscallarg(const char *) path; 2290 syscallarg(int) mode; 2291 } */ 2292 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2293 } 2294 2295 int 2296 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2297 register_t *retval) 2298 { 2299 /* { 2300 syscallarg(int) fd; 2301 syscallarg(const char *) path; 2302 syscallarg(int) mode; 2303 } */ 2304 2305 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2306 SCARG(uap, mode)); 2307 } 2308 2309 static int 2310 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2311 { 2312 struct proc *p = l->l_proc; 2313 struct vattr vattr; 2314 int error; 2315 struct pathbuf *pb; 2316 struct nameidata nd; 2317 2318 error = pathbuf_copyin(path, &pb); 2319 if (error) { 2320 return error; 2321 } 2322 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2323 2324 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2325 pathbuf_destroy(pb); 2326 return error; 2327 } 2328 if (nd.ni_vp != NULL) { 2329 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2330 if (nd.ni_dvp == nd.ni_vp) 2331 vrele(nd.ni_dvp); 2332 else 2333 vput(nd.ni_dvp); 2334 vrele(nd.ni_vp); 2335 pathbuf_destroy(pb); 2336 return (EEXIST); 2337 } 2338 vattr_null(&vattr); 2339 vattr.va_type = VFIFO; 2340 /* We will read cwdi->cwdi_cmask unlocked. */ 2341 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2342 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2343 if (error == 0) 2344 vput(nd.ni_vp); 2345 pathbuf_destroy(pb); 2346 return (error); 2347 } 2348 2349 /* 2350 * Make a hard file link. 2351 */ 2352 /* ARGSUSED */ 2353 int 2354 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2355 const char *link, int follow, register_t *retval) 2356 { 2357 struct vnode *vp; 2358 struct pathbuf *linkpb; 2359 struct nameidata nd; 2360 namei_simple_flags_t ns_flags; 2361 int error; 2362 2363 if (follow & AT_SYMLINK_FOLLOW) 2364 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2365 else 2366 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2367 2368 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2369 if (error != 0) 2370 return (error); 2371 error = pathbuf_copyin(link, &linkpb); 2372 if (error) { 2373 goto out1; 2374 } 2375 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2376 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2377 goto out2; 2378 if (nd.ni_vp) { 2379 error = EEXIST; 2380 goto abortop; 2381 } 2382 /* Prevent hard links on directories. */ 2383 if (vp->v_type == VDIR) { 2384 error = EPERM; 2385 goto abortop; 2386 } 2387 /* Prevent cross-mount operation. */ 2388 if (nd.ni_dvp->v_mount != vp->v_mount) { 2389 error = EXDEV; 2390 goto abortop; 2391 } 2392 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2393 out2: 2394 pathbuf_destroy(linkpb); 2395 out1: 2396 vrele(vp); 2397 return (error); 2398 abortop: 2399 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2400 if (nd.ni_dvp == nd.ni_vp) 2401 vrele(nd.ni_dvp); 2402 else 2403 vput(nd.ni_dvp); 2404 if (nd.ni_vp != NULL) 2405 vrele(nd.ni_vp); 2406 goto out2; 2407 } 2408 2409 int 2410 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2411 { 2412 /* { 2413 syscallarg(const char *) path; 2414 syscallarg(const char *) link; 2415 } */ 2416 const char *path = SCARG(uap, path); 2417 const char *link = SCARG(uap, link); 2418 2419 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2420 AT_SYMLINK_FOLLOW, retval); 2421 } 2422 2423 int 2424 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2425 register_t *retval) 2426 { 2427 /* { 2428 syscallarg(int) fd1; 2429 syscallarg(const char *) name1; 2430 syscallarg(int) fd2; 2431 syscallarg(const char *) name2; 2432 syscallarg(int) flags; 2433 } */ 2434 int fd1 = SCARG(uap, fd1); 2435 const char *name1 = SCARG(uap, name1); 2436 int fd2 = SCARG(uap, fd2); 2437 const char *name2 = SCARG(uap, name2); 2438 int follow; 2439 2440 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2441 2442 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2443 } 2444 2445 2446 int 2447 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2448 { 2449 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2450 } 2451 2452 static int 2453 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2454 const char *link, enum uio_seg seg) 2455 { 2456 struct proc *p = curproc; 2457 struct vattr vattr; 2458 char *path; 2459 int error; 2460 struct pathbuf *linkpb; 2461 struct nameidata nd; 2462 2463 KASSERT(l != NULL || fdat == AT_FDCWD); 2464 2465 path = PNBUF_GET(); 2466 if (seg == UIO_USERSPACE) { 2467 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2468 goto out1; 2469 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2470 goto out1; 2471 } else { 2472 KASSERT(strlen(patharg) < MAXPATHLEN); 2473 strcpy(path, patharg); 2474 linkpb = pathbuf_create(link); 2475 if (linkpb == NULL) { 2476 error = ENOMEM; 2477 goto out1; 2478 } 2479 } 2480 ktrkuser("symlink-target", path, strlen(path)); 2481 2482 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2483 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2484 goto out2; 2485 if (nd.ni_vp) { 2486 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2487 if (nd.ni_dvp == nd.ni_vp) 2488 vrele(nd.ni_dvp); 2489 else 2490 vput(nd.ni_dvp); 2491 vrele(nd.ni_vp); 2492 error = EEXIST; 2493 goto out2; 2494 } 2495 vattr_null(&vattr); 2496 vattr.va_type = VLNK; 2497 /* We will read cwdi->cwdi_cmask unlocked. */ 2498 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2499 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2500 if (error == 0) 2501 vput(nd.ni_vp); 2502 out2: 2503 pathbuf_destroy(linkpb); 2504 out1: 2505 PNBUF_PUT(path); 2506 return (error); 2507 } 2508 2509 /* 2510 * Make a symbolic link. 2511 */ 2512 /* ARGSUSED */ 2513 int 2514 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2515 { 2516 /* { 2517 syscallarg(const char *) path; 2518 syscallarg(const char *) link; 2519 } */ 2520 2521 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2522 UIO_USERSPACE); 2523 } 2524 2525 int 2526 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2527 register_t *retval) 2528 { 2529 /* { 2530 syscallarg(const char *) path1; 2531 syscallarg(int) fd; 2532 syscallarg(const char *) path2; 2533 } */ 2534 2535 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2536 SCARG(uap, path2), UIO_USERSPACE); 2537 } 2538 2539 /* 2540 * Delete a whiteout from the filesystem. 2541 */ 2542 /* ARGSUSED */ 2543 int 2544 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2545 { 2546 /* { 2547 syscallarg(const char *) path; 2548 } */ 2549 int error; 2550 struct pathbuf *pb; 2551 struct nameidata nd; 2552 2553 error = pathbuf_copyin(SCARG(uap, path), &pb); 2554 if (error) { 2555 return error; 2556 } 2557 2558 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2559 error = namei(&nd); 2560 if (error) { 2561 pathbuf_destroy(pb); 2562 return (error); 2563 } 2564 2565 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2566 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2567 if (nd.ni_dvp == nd.ni_vp) 2568 vrele(nd.ni_dvp); 2569 else 2570 vput(nd.ni_dvp); 2571 if (nd.ni_vp) 2572 vrele(nd.ni_vp); 2573 pathbuf_destroy(pb); 2574 return (EEXIST); 2575 } 2576 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2577 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2578 vput(nd.ni_dvp); 2579 pathbuf_destroy(pb); 2580 return (error); 2581 } 2582 2583 /* 2584 * Delete a name from the filesystem. 2585 */ 2586 /* ARGSUSED */ 2587 int 2588 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2589 { 2590 /* { 2591 syscallarg(const char *) path; 2592 } */ 2593 2594 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2595 } 2596 2597 int 2598 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2599 register_t *retval) 2600 { 2601 /* { 2602 syscallarg(int) fd; 2603 syscallarg(const char *) path; 2604 syscallarg(int) flag; 2605 } */ 2606 2607 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2608 SCARG(uap, flag), UIO_USERSPACE); 2609 } 2610 2611 int 2612 do_sys_unlink(const char *arg, enum uio_seg seg) 2613 { 2614 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2615 } 2616 2617 static int 2618 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2619 enum uio_seg seg) 2620 { 2621 struct vnode *vp; 2622 int error; 2623 struct pathbuf *pb; 2624 struct nameidata nd; 2625 const char *pathstring; 2626 2627 KASSERT(l != NULL || fdat == AT_FDCWD); 2628 2629 error = pathbuf_maybe_copyin(arg, seg, &pb); 2630 if (error) { 2631 return error; 2632 } 2633 pathstring = pathbuf_stringcopy_get(pb); 2634 if (pathstring == NULL) { 2635 pathbuf_destroy(pb); 2636 return ENOMEM; 2637 } 2638 2639 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2640 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2641 goto out; 2642 vp = nd.ni_vp; 2643 2644 /* 2645 * The root of a mounted filesystem cannot be deleted. 2646 */ 2647 if ((vp->v_vflag & VV_ROOT) != 0) { 2648 error = EBUSY; 2649 goto abort; 2650 } 2651 2652 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2653 error = EBUSY; 2654 goto abort; 2655 } 2656 2657 /* 2658 * No rmdir "." please. 2659 */ 2660 if (nd.ni_dvp == vp) { 2661 error = EINVAL; 2662 goto abort; 2663 } 2664 2665 /* 2666 * AT_REMOVEDIR is required to remove a directory 2667 */ 2668 if (vp->v_type == VDIR) { 2669 if (!(flags & AT_REMOVEDIR)) { 2670 error = EPERM; 2671 goto abort; 2672 } else { 2673 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2674 goto out; 2675 } 2676 } 2677 2678 /* 2679 * Starting here we only deal with non directories. 2680 */ 2681 if (flags & AT_REMOVEDIR) { 2682 error = ENOTDIR; 2683 goto abort; 2684 } 2685 2686 2687 #if NVERIEXEC > 0 2688 /* Handle remove requests for veriexec entries. */ 2689 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2690 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2691 if (nd.ni_dvp == vp) 2692 vrele(nd.ni_dvp); 2693 else 2694 vput(nd.ni_dvp); 2695 vput(vp); 2696 goto out; 2697 } 2698 #endif /* NVERIEXEC > 0 */ 2699 2700 #ifdef FILEASSOC 2701 (void)fileassoc_file_delete(vp); 2702 #endif /* FILEASSOC */ 2703 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2704 goto out; 2705 2706 abort: 2707 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2708 if (nd.ni_dvp == vp) 2709 vrele(nd.ni_dvp); 2710 else 2711 vput(nd.ni_dvp); 2712 vput(vp); 2713 2714 out: 2715 pathbuf_stringcopy_put(pb, pathstring); 2716 pathbuf_destroy(pb); 2717 return (error); 2718 } 2719 2720 /* 2721 * Reposition read/write file offset. 2722 */ 2723 int 2724 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2725 { 2726 /* { 2727 syscallarg(int) fd; 2728 syscallarg(int) pad; 2729 syscallarg(off_t) offset; 2730 syscallarg(int) whence; 2731 } */ 2732 kauth_cred_t cred = l->l_cred; 2733 file_t *fp; 2734 struct vnode *vp; 2735 struct vattr vattr; 2736 off_t newoff; 2737 int error, fd; 2738 2739 fd = SCARG(uap, fd); 2740 2741 if ((fp = fd_getfile(fd)) == NULL) 2742 return (EBADF); 2743 2744 vp = fp->f_data; 2745 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2746 error = ESPIPE; 2747 goto out; 2748 } 2749 2750 switch (SCARG(uap, whence)) { 2751 case SEEK_CUR: 2752 newoff = fp->f_offset + SCARG(uap, offset); 2753 break; 2754 case SEEK_END: 2755 vn_lock(vp, LK_SHARED | LK_RETRY); 2756 error = VOP_GETATTR(vp, &vattr, cred); 2757 VOP_UNLOCK(vp); 2758 if (error) { 2759 goto out; 2760 } 2761 newoff = SCARG(uap, offset) + vattr.va_size; 2762 break; 2763 case SEEK_SET: 2764 newoff = SCARG(uap, offset); 2765 break; 2766 default: 2767 error = EINVAL; 2768 goto out; 2769 } 2770 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2771 *(off_t *)retval = fp->f_offset = newoff; 2772 } 2773 out: 2774 fd_putfile(fd); 2775 return (error); 2776 } 2777 2778 /* 2779 * Positional read system call. 2780 */ 2781 int 2782 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2783 { 2784 /* { 2785 syscallarg(int) fd; 2786 syscallarg(void *) buf; 2787 syscallarg(size_t) nbyte; 2788 syscallarg(off_t) offset; 2789 } */ 2790 file_t *fp; 2791 struct vnode *vp; 2792 off_t offset; 2793 int error, fd = SCARG(uap, fd); 2794 2795 if ((fp = fd_getfile(fd)) == NULL) 2796 return (EBADF); 2797 2798 if ((fp->f_flag & FREAD) == 0) { 2799 fd_putfile(fd); 2800 return (EBADF); 2801 } 2802 2803 vp = fp->f_data; 2804 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2805 error = ESPIPE; 2806 goto out; 2807 } 2808 2809 offset = SCARG(uap, offset); 2810 2811 /* 2812 * XXX This works because no file systems actually 2813 * XXX take any action on the seek operation. 2814 */ 2815 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2816 goto out; 2817 2818 /* dofileread() will unuse the descriptor for us */ 2819 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2820 &offset, 0, retval)); 2821 2822 out: 2823 fd_putfile(fd); 2824 return (error); 2825 } 2826 2827 /* 2828 * Positional scatter read system call. 2829 */ 2830 int 2831 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2832 { 2833 /* { 2834 syscallarg(int) fd; 2835 syscallarg(const struct iovec *) iovp; 2836 syscallarg(int) iovcnt; 2837 syscallarg(off_t) offset; 2838 } */ 2839 off_t offset = SCARG(uap, offset); 2840 2841 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2842 SCARG(uap, iovcnt), &offset, 0, retval); 2843 } 2844 2845 /* 2846 * Positional write system call. 2847 */ 2848 int 2849 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2850 { 2851 /* { 2852 syscallarg(int) fd; 2853 syscallarg(const void *) buf; 2854 syscallarg(size_t) nbyte; 2855 syscallarg(off_t) offset; 2856 } */ 2857 file_t *fp; 2858 struct vnode *vp; 2859 off_t offset; 2860 int error, fd = SCARG(uap, fd); 2861 2862 if ((fp = fd_getfile(fd)) == NULL) 2863 return (EBADF); 2864 2865 if ((fp->f_flag & FWRITE) == 0) { 2866 fd_putfile(fd); 2867 return (EBADF); 2868 } 2869 2870 vp = fp->f_data; 2871 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2872 error = ESPIPE; 2873 goto out; 2874 } 2875 2876 offset = SCARG(uap, offset); 2877 2878 /* 2879 * XXX This works because no file systems actually 2880 * XXX take any action on the seek operation. 2881 */ 2882 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2883 goto out; 2884 2885 /* dofilewrite() will unuse the descriptor for us */ 2886 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2887 &offset, 0, retval)); 2888 2889 out: 2890 fd_putfile(fd); 2891 return (error); 2892 } 2893 2894 /* 2895 * Positional gather write system call. 2896 */ 2897 int 2898 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2899 { 2900 /* { 2901 syscallarg(int) fd; 2902 syscallarg(const struct iovec *) iovp; 2903 syscallarg(int) iovcnt; 2904 syscallarg(off_t) offset; 2905 } */ 2906 off_t offset = SCARG(uap, offset); 2907 2908 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2909 SCARG(uap, iovcnt), &offset, 0, retval); 2910 } 2911 2912 /* 2913 * Check access permissions. 2914 */ 2915 int 2916 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2917 { 2918 /* { 2919 syscallarg(const char *) path; 2920 syscallarg(int) flags; 2921 } */ 2922 2923 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2924 SCARG(uap, flags), 0); 2925 } 2926 2927 int 2928 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2929 int mode, int flags) 2930 { 2931 kauth_cred_t cred; 2932 struct vnode *vp; 2933 int error, nd_flag, vmode; 2934 struct pathbuf *pb; 2935 struct nameidata nd; 2936 2937 CTASSERT(F_OK == 0); 2938 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2939 /* nonsense mode */ 2940 return EINVAL; 2941 } 2942 2943 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2944 if (flags & AT_SYMLINK_NOFOLLOW) 2945 nd_flag &= ~FOLLOW; 2946 2947 error = pathbuf_copyin(path, &pb); 2948 if (error) 2949 return error; 2950 2951 NDINIT(&nd, LOOKUP, nd_flag, pb); 2952 2953 /* Override default credentials */ 2954 cred = kauth_cred_dup(l->l_cred); 2955 if (!(flags & AT_EACCESS)) { 2956 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2957 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2958 } 2959 nd.ni_cnd.cn_cred = cred; 2960 2961 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2962 pathbuf_destroy(pb); 2963 goto out; 2964 } 2965 vp = nd.ni_vp; 2966 pathbuf_destroy(pb); 2967 2968 /* Flags == 0 means only check for existence. */ 2969 if (mode) { 2970 vmode = 0; 2971 if (mode & R_OK) 2972 vmode |= VREAD; 2973 if (mode & W_OK) 2974 vmode |= VWRITE; 2975 if (mode & X_OK) 2976 vmode |= VEXEC; 2977 2978 error = VOP_ACCESS(vp, vmode, cred); 2979 if (!error && (vmode & VWRITE)) 2980 error = vn_writechk(vp); 2981 } 2982 vput(vp); 2983 out: 2984 kauth_cred_free(cred); 2985 return (error); 2986 } 2987 2988 int 2989 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 2990 register_t *retval) 2991 { 2992 /* { 2993 syscallarg(int) fd; 2994 syscallarg(const char *) path; 2995 syscallarg(int) amode; 2996 syscallarg(int) flag; 2997 } */ 2998 2999 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3000 SCARG(uap, amode), SCARG(uap, flag)); 3001 } 3002 3003 /* 3004 * Common code for all sys_stat functions, including compat versions. 3005 */ 3006 int 3007 do_sys_stat(const char *userpath, unsigned int nd_flag, 3008 struct stat *sb) 3009 { 3010 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3011 } 3012 3013 int 3014 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3015 unsigned int nd_flag, struct stat *sb) 3016 { 3017 int error; 3018 struct pathbuf *pb; 3019 struct nameidata nd; 3020 3021 KASSERT(l != NULL || fdat == AT_FDCWD); 3022 3023 error = pathbuf_copyin(userpath, &pb); 3024 if (error) { 3025 return error; 3026 } 3027 3028 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3029 3030 error = fd_nameiat(l, fdat, &nd); 3031 if (error != 0) { 3032 pathbuf_destroy(pb); 3033 return error; 3034 } 3035 error = vn_stat(nd.ni_vp, sb); 3036 vput(nd.ni_vp); 3037 pathbuf_destroy(pb); 3038 return error; 3039 } 3040 3041 /* 3042 * Get file status; this version follows links. 3043 */ 3044 /* ARGSUSED */ 3045 int 3046 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3047 { 3048 /* { 3049 syscallarg(const char *) path; 3050 syscallarg(struct stat *) ub; 3051 } */ 3052 struct stat sb; 3053 int error; 3054 3055 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3056 if (error) 3057 return error; 3058 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3059 } 3060 3061 /* 3062 * Get file status; this version does not follow links. 3063 */ 3064 /* ARGSUSED */ 3065 int 3066 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3067 { 3068 /* { 3069 syscallarg(const char *) path; 3070 syscallarg(struct stat *) ub; 3071 } */ 3072 struct stat sb; 3073 int error; 3074 3075 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3076 if (error) 3077 return error; 3078 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3079 } 3080 3081 int 3082 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3083 register_t *retval) 3084 { 3085 /* { 3086 syscallarg(int) fd; 3087 syscallarg(const char *) path; 3088 syscallarg(struct stat *) buf; 3089 syscallarg(int) flag; 3090 } */ 3091 unsigned int nd_flag; 3092 struct stat sb; 3093 int error; 3094 3095 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3096 nd_flag = NOFOLLOW; 3097 else 3098 nd_flag = FOLLOW; 3099 3100 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3101 &sb); 3102 if (error) 3103 return error; 3104 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3105 } 3106 3107 /* 3108 * Get configurable pathname variables. 3109 */ 3110 /* ARGSUSED */ 3111 int 3112 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3113 { 3114 /* { 3115 syscallarg(const char *) path; 3116 syscallarg(int) name; 3117 } */ 3118 int error; 3119 struct pathbuf *pb; 3120 struct nameidata nd; 3121 3122 error = pathbuf_copyin(SCARG(uap, path), &pb); 3123 if (error) { 3124 return error; 3125 } 3126 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3127 if ((error = namei(&nd)) != 0) { 3128 pathbuf_destroy(pb); 3129 return (error); 3130 } 3131 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3132 vput(nd.ni_vp); 3133 pathbuf_destroy(pb); 3134 return (error); 3135 } 3136 3137 /* 3138 * Return target name of a symbolic link. 3139 */ 3140 /* ARGSUSED */ 3141 int 3142 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3143 register_t *retval) 3144 { 3145 /* { 3146 syscallarg(const char *) path; 3147 syscallarg(char *) buf; 3148 syscallarg(size_t) count; 3149 } */ 3150 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3151 SCARG(uap, buf), SCARG(uap, count), retval); 3152 } 3153 3154 static int 3155 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3156 size_t count, register_t *retval) 3157 { 3158 struct vnode *vp; 3159 struct iovec aiov; 3160 struct uio auio; 3161 int error; 3162 struct pathbuf *pb; 3163 struct nameidata nd; 3164 3165 error = pathbuf_copyin(path, &pb); 3166 if (error) { 3167 return error; 3168 } 3169 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3170 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3171 pathbuf_destroy(pb); 3172 return error; 3173 } 3174 vp = nd.ni_vp; 3175 pathbuf_destroy(pb); 3176 if (vp->v_type != VLNK) 3177 error = EINVAL; 3178 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3179 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3180 aiov.iov_base = buf; 3181 aiov.iov_len = count; 3182 auio.uio_iov = &aiov; 3183 auio.uio_iovcnt = 1; 3184 auio.uio_offset = 0; 3185 auio.uio_rw = UIO_READ; 3186 KASSERT(l == curlwp); 3187 auio.uio_vmspace = l->l_proc->p_vmspace; 3188 auio.uio_resid = count; 3189 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3190 *retval = count - auio.uio_resid; 3191 } 3192 vput(vp); 3193 return (error); 3194 } 3195 3196 int 3197 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3198 register_t *retval) 3199 { 3200 /* { 3201 syscallarg(int) fd; 3202 syscallarg(const char *) path; 3203 syscallarg(char *) buf; 3204 syscallarg(size_t) bufsize; 3205 } */ 3206 3207 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3208 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3209 } 3210 3211 /* 3212 * Change flags of a file given a path name. 3213 */ 3214 /* ARGSUSED */ 3215 int 3216 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3217 { 3218 /* { 3219 syscallarg(const char *) path; 3220 syscallarg(u_long) flags; 3221 } */ 3222 struct vnode *vp; 3223 int error; 3224 3225 error = namei_simple_user(SCARG(uap, path), 3226 NSM_FOLLOW_TRYEMULROOT, &vp); 3227 if (error != 0) 3228 return (error); 3229 error = change_flags(vp, SCARG(uap, flags), l); 3230 vput(vp); 3231 return (error); 3232 } 3233 3234 /* 3235 * Change flags of a file given a file descriptor. 3236 */ 3237 /* ARGSUSED */ 3238 int 3239 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3240 { 3241 /* { 3242 syscallarg(int) fd; 3243 syscallarg(u_long) flags; 3244 } */ 3245 struct vnode *vp; 3246 file_t *fp; 3247 int error; 3248 3249 /* fd_getvnode() will use the descriptor for us */ 3250 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3251 return (error); 3252 vp = fp->f_data; 3253 error = change_flags(vp, SCARG(uap, flags), l); 3254 VOP_UNLOCK(vp); 3255 fd_putfile(SCARG(uap, fd)); 3256 return (error); 3257 } 3258 3259 /* 3260 * Change flags of a file given a path name; this version does 3261 * not follow links. 3262 */ 3263 int 3264 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3265 { 3266 /* { 3267 syscallarg(const char *) path; 3268 syscallarg(u_long) flags; 3269 } */ 3270 struct vnode *vp; 3271 int error; 3272 3273 error = namei_simple_user(SCARG(uap, path), 3274 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3275 if (error != 0) 3276 return (error); 3277 error = change_flags(vp, SCARG(uap, flags), l); 3278 vput(vp); 3279 return (error); 3280 } 3281 3282 /* 3283 * Common routine to change flags of a file. 3284 */ 3285 int 3286 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3287 { 3288 struct vattr vattr; 3289 int error; 3290 3291 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3292 3293 vattr_null(&vattr); 3294 vattr.va_flags = flags; 3295 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3296 3297 return (error); 3298 } 3299 3300 /* 3301 * Change mode of a file given path name; this version follows links. 3302 */ 3303 /* ARGSUSED */ 3304 int 3305 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3306 { 3307 /* { 3308 syscallarg(const char *) path; 3309 syscallarg(int) mode; 3310 } */ 3311 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3312 SCARG(uap, mode), 0); 3313 } 3314 3315 int 3316 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3317 { 3318 int error; 3319 struct vnode *vp; 3320 namei_simple_flags_t ns_flag; 3321 3322 if (flags & AT_SYMLINK_NOFOLLOW) 3323 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3324 else 3325 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3326 3327 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3328 if (error != 0) 3329 return error; 3330 3331 error = change_mode(vp, mode, l); 3332 3333 vrele(vp); 3334 3335 return (error); 3336 } 3337 3338 /* 3339 * Change mode of a file given a file descriptor. 3340 */ 3341 /* ARGSUSED */ 3342 int 3343 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3344 { 3345 /* { 3346 syscallarg(int) fd; 3347 syscallarg(int) mode; 3348 } */ 3349 file_t *fp; 3350 int error; 3351 3352 /* fd_getvnode() will use the descriptor for us */ 3353 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3354 return (error); 3355 error = change_mode(fp->f_data, SCARG(uap, mode), l); 3356 fd_putfile(SCARG(uap, fd)); 3357 return (error); 3358 } 3359 3360 int 3361 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3362 register_t *retval) 3363 { 3364 /* { 3365 syscallarg(int) fd; 3366 syscallarg(const char *) path; 3367 syscallarg(int) mode; 3368 syscallarg(int) flag; 3369 } */ 3370 3371 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3372 SCARG(uap, mode), SCARG(uap, flag)); 3373 } 3374 3375 /* 3376 * Change mode of a file given path name; this version does not follow links. 3377 */ 3378 /* ARGSUSED */ 3379 int 3380 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3381 { 3382 /* { 3383 syscallarg(const char *) path; 3384 syscallarg(int) mode; 3385 } */ 3386 int error; 3387 struct vnode *vp; 3388 3389 error = namei_simple_user(SCARG(uap, path), 3390 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3391 if (error != 0) 3392 return (error); 3393 3394 error = change_mode(vp, SCARG(uap, mode), l); 3395 3396 vrele(vp); 3397 return (error); 3398 } 3399 3400 /* 3401 * Common routine to set mode given a vnode. 3402 */ 3403 static int 3404 change_mode(struct vnode *vp, int mode, struct lwp *l) 3405 { 3406 struct vattr vattr; 3407 int error; 3408 3409 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3410 vattr_null(&vattr); 3411 vattr.va_mode = mode & ALLPERMS; 3412 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3413 VOP_UNLOCK(vp); 3414 return (error); 3415 } 3416 3417 /* 3418 * Set ownership given a path name; this version follows links. 3419 */ 3420 /* ARGSUSED */ 3421 int 3422 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3423 { 3424 /* { 3425 syscallarg(const char *) path; 3426 syscallarg(uid_t) uid; 3427 syscallarg(gid_t) gid; 3428 } */ 3429 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3430 SCARG(uap, gid), 0); 3431 } 3432 3433 int 3434 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3435 gid_t gid, int flags) 3436 { 3437 int error; 3438 struct vnode *vp; 3439 namei_simple_flags_t ns_flag; 3440 3441 if (flags & AT_SYMLINK_NOFOLLOW) 3442 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3443 else 3444 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3445 3446 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3447 if (error != 0) 3448 return error; 3449 3450 error = change_owner(vp, uid, gid, l, 0); 3451 3452 vrele(vp); 3453 3454 return (error); 3455 } 3456 3457 /* 3458 * Set ownership given a path name; this version follows links. 3459 * Provides POSIX semantics. 3460 */ 3461 /* ARGSUSED */ 3462 int 3463 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3464 { 3465 /* { 3466 syscallarg(const char *) path; 3467 syscallarg(uid_t) uid; 3468 syscallarg(gid_t) gid; 3469 } */ 3470 int error; 3471 struct vnode *vp; 3472 3473 error = namei_simple_user(SCARG(uap, path), 3474 NSM_FOLLOW_TRYEMULROOT, &vp); 3475 if (error != 0) 3476 return (error); 3477 3478 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3479 3480 vrele(vp); 3481 return (error); 3482 } 3483 3484 /* 3485 * Set ownership given a file descriptor. 3486 */ 3487 /* ARGSUSED */ 3488 int 3489 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3490 { 3491 /* { 3492 syscallarg(int) fd; 3493 syscallarg(uid_t) uid; 3494 syscallarg(gid_t) gid; 3495 } */ 3496 int error; 3497 file_t *fp; 3498 3499 /* fd_getvnode() will use the descriptor for us */ 3500 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3501 return (error); 3502 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3503 l, 0); 3504 fd_putfile(SCARG(uap, fd)); 3505 return (error); 3506 } 3507 3508 int 3509 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3510 register_t *retval) 3511 { 3512 /* { 3513 syscallarg(int) fd; 3514 syscallarg(const char *) path; 3515 syscallarg(uid_t) owner; 3516 syscallarg(gid_t) group; 3517 syscallarg(int) flag; 3518 } */ 3519 3520 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3521 SCARG(uap, owner), SCARG(uap, group), 3522 SCARG(uap, flag)); 3523 } 3524 3525 /* 3526 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3527 */ 3528 /* ARGSUSED */ 3529 int 3530 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3531 { 3532 /* { 3533 syscallarg(int) fd; 3534 syscallarg(uid_t) uid; 3535 syscallarg(gid_t) gid; 3536 } */ 3537 int error; 3538 file_t *fp; 3539 3540 /* fd_getvnode() will use the descriptor for us */ 3541 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3542 return (error); 3543 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3544 l, 1); 3545 fd_putfile(SCARG(uap, fd)); 3546 return (error); 3547 } 3548 3549 /* 3550 * Set ownership given a path name; this version does not follow links. 3551 */ 3552 /* ARGSUSED */ 3553 int 3554 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3555 { 3556 /* { 3557 syscallarg(const char *) path; 3558 syscallarg(uid_t) uid; 3559 syscallarg(gid_t) gid; 3560 } */ 3561 int error; 3562 struct vnode *vp; 3563 3564 error = namei_simple_user(SCARG(uap, path), 3565 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3566 if (error != 0) 3567 return (error); 3568 3569 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3570 3571 vrele(vp); 3572 return (error); 3573 } 3574 3575 /* 3576 * Set ownership given a path name; this version does not follow links. 3577 * Provides POSIX/XPG semantics. 3578 */ 3579 /* ARGSUSED */ 3580 int 3581 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3582 { 3583 /* { 3584 syscallarg(const char *) path; 3585 syscallarg(uid_t) uid; 3586 syscallarg(gid_t) gid; 3587 } */ 3588 int error; 3589 struct vnode *vp; 3590 3591 error = namei_simple_user(SCARG(uap, path), 3592 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3593 if (error != 0) 3594 return (error); 3595 3596 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3597 3598 vrele(vp); 3599 return (error); 3600 } 3601 3602 /* 3603 * Common routine to set ownership given a vnode. 3604 */ 3605 static int 3606 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3607 int posix_semantics) 3608 { 3609 struct vattr vattr; 3610 mode_t newmode; 3611 int error; 3612 3613 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3614 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3615 goto out; 3616 3617 #define CHANGED(x) ((int)(x) != -1) 3618 newmode = vattr.va_mode; 3619 if (posix_semantics) { 3620 /* 3621 * POSIX/XPG semantics: if the caller is not the super-user, 3622 * clear set-user-id and set-group-id bits. Both POSIX and 3623 * the XPG consider the behaviour for calls by the super-user 3624 * implementation-defined; we leave the set-user-id and set- 3625 * group-id settings intact in that case. 3626 */ 3627 if (vattr.va_mode & S_ISUID) { 3628 if (kauth_authorize_vnode(l->l_cred, 3629 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3630 newmode &= ~S_ISUID; 3631 } 3632 if (vattr.va_mode & S_ISGID) { 3633 if (kauth_authorize_vnode(l->l_cred, 3634 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3635 newmode &= ~S_ISGID; 3636 } 3637 } else { 3638 /* 3639 * NetBSD semantics: when changing owner and/or group, 3640 * clear the respective bit(s). 3641 */ 3642 if (CHANGED(uid)) 3643 newmode &= ~S_ISUID; 3644 if (CHANGED(gid)) 3645 newmode &= ~S_ISGID; 3646 } 3647 /* Update va_mode iff altered. */ 3648 if (vattr.va_mode == newmode) 3649 newmode = VNOVAL; 3650 3651 vattr_null(&vattr); 3652 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3653 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3654 vattr.va_mode = newmode; 3655 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3656 #undef CHANGED 3657 3658 out: 3659 VOP_UNLOCK(vp); 3660 return (error); 3661 } 3662 3663 /* 3664 * Set the access and modification times given a path name; this 3665 * version follows links. 3666 */ 3667 /* ARGSUSED */ 3668 int 3669 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3670 register_t *retval) 3671 { 3672 /* { 3673 syscallarg(const char *) path; 3674 syscallarg(const struct timeval *) tptr; 3675 } */ 3676 3677 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3678 SCARG(uap, tptr), UIO_USERSPACE); 3679 } 3680 3681 /* 3682 * Set the access and modification times given a file descriptor. 3683 */ 3684 /* ARGSUSED */ 3685 int 3686 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3687 register_t *retval) 3688 { 3689 /* { 3690 syscallarg(int) fd; 3691 syscallarg(const struct timeval *) tptr; 3692 } */ 3693 int error; 3694 file_t *fp; 3695 3696 /* fd_getvnode() will use the descriptor for us */ 3697 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3698 return (error); 3699 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3700 UIO_USERSPACE); 3701 fd_putfile(SCARG(uap, fd)); 3702 return (error); 3703 } 3704 3705 int 3706 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3707 register_t *retval) 3708 { 3709 /* { 3710 syscallarg(int) fd; 3711 syscallarg(const struct timespec *) tptr; 3712 } */ 3713 int error; 3714 file_t *fp; 3715 3716 /* fd_getvnode() will use the descriptor for us */ 3717 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3718 return (error); 3719 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0, 3720 SCARG(uap, tptr), UIO_USERSPACE); 3721 fd_putfile(SCARG(uap, fd)); 3722 return (error); 3723 } 3724 3725 /* 3726 * Set the access and modification times given a path name; this 3727 * version does not follow links. 3728 */ 3729 int 3730 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3731 register_t *retval) 3732 { 3733 /* { 3734 syscallarg(const char *) path; 3735 syscallarg(const struct timeval *) tptr; 3736 } */ 3737 3738 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3739 SCARG(uap, tptr), UIO_USERSPACE); 3740 } 3741 3742 int 3743 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3744 register_t *retval) 3745 { 3746 /* { 3747 syscallarg(int) fd; 3748 syscallarg(const char *) path; 3749 syscallarg(const struct timespec *) tptr; 3750 syscallarg(int) flag; 3751 } */ 3752 int follow; 3753 const struct timespec *tptr; 3754 int error; 3755 3756 tptr = SCARG(uap, tptr); 3757 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3758 3759 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3760 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3761 3762 return error; 3763 } 3764 3765 /* 3766 * Common routine to set access and modification times given a vnode. 3767 */ 3768 int 3769 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3770 const struct timespec *tptr, enum uio_seg seg) 3771 { 3772 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3773 } 3774 3775 int 3776 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3777 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3778 { 3779 struct vattr vattr; 3780 int error, dorele = 0; 3781 namei_simple_flags_t sflags; 3782 bool vanull, setbirthtime; 3783 struct timespec ts[2]; 3784 3785 KASSERT(l != NULL || fdat == AT_FDCWD); 3786 3787 /* 3788 * I have checked all callers and they pass either FOLLOW, 3789 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3790 * is 0. More to the point, they don't pass anything else. 3791 * Let's keep it that way at least until the namei interfaces 3792 * are fully sanitized. 3793 */ 3794 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3795 sflags = (flag == FOLLOW) ? 3796 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3797 3798 if (tptr == NULL) { 3799 vanull = true; 3800 nanotime(&ts[0]); 3801 ts[1] = ts[0]; 3802 } else { 3803 vanull = false; 3804 if (seg != UIO_SYSSPACE) { 3805 error = copyin(tptr, ts, sizeof (ts)); 3806 if (error != 0) 3807 return error; 3808 } else { 3809 ts[0] = tptr[0]; 3810 ts[1] = tptr[1]; 3811 } 3812 } 3813 3814 if (ts[0].tv_nsec == UTIME_NOW) { 3815 nanotime(&ts[0]); 3816 if (ts[1].tv_nsec == UTIME_NOW) { 3817 vanull = true; 3818 ts[1] = ts[0]; 3819 } 3820 } else if (ts[1].tv_nsec == UTIME_NOW) 3821 nanotime(&ts[1]); 3822 3823 if (vp == NULL) { 3824 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3825 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3826 if (error != 0) 3827 return error; 3828 dorele = 1; 3829 } 3830 3831 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3832 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3833 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3834 vattr_null(&vattr); 3835 3836 if (ts[0].tv_nsec != UTIME_OMIT) 3837 vattr.va_atime = ts[0]; 3838 3839 if (ts[1].tv_nsec != UTIME_OMIT) { 3840 vattr.va_mtime = ts[1]; 3841 if (setbirthtime) 3842 vattr.va_birthtime = ts[1]; 3843 } 3844 3845 if (vanull) 3846 vattr.va_vaflags |= VA_UTIMES_NULL; 3847 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3848 VOP_UNLOCK(vp); 3849 3850 if (dorele != 0) 3851 vrele(vp); 3852 3853 return error; 3854 } 3855 3856 int 3857 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3858 const struct timeval *tptr, enum uio_seg seg) 3859 { 3860 struct timespec ts[2]; 3861 struct timespec *tsptr = NULL; 3862 int error; 3863 3864 if (tptr != NULL) { 3865 struct timeval tv[2]; 3866 3867 if (seg != UIO_SYSSPACE) { 3868 error = copyin(tptr, tv, sizeof (tv)); 3869 if (error != 0) 3870 return error; 3871 tptr = tv; 3872 } 3873 3874 if ((tv[0].tv_usec == UTIME_NOW) || 3875 (tv[0].tv_usec == UTIME_OMIT)) 3876 ts[0].tv_nsec = tv[0].tv_usec; 3877 else 3878 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3879 3880 if ((tv[1].tv_usec == UTIME_NOW) || 3881 (tv[1].tv_usec == UTIME_OMIT)) 3882 ts[1].tv_nsec = tv[1].tv_usec; 3883 else 3884 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3885 3886 tsptr = &ts[0]; 3887 } 3888 3889 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3890 } 3891 3892 /* 3893 * Truncate a file given its path name. 3894 */ 3895 /* ARGSUSED */ 3896 int 3897 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3898 { 3899 /* { 3900 syscallarg(const char *) path; 3901 syscallarg(int) pad; 3902 syscallarg(off_t) length; 3903 } */ 3904 struct vnode *vp; 3905 struct vattr vattr; 3906 int error; 3907 3908 error = namei_simple_user(SCARG(uap, path), 3909 NSM_FOLLOW_TRYEMULROOT, &vp); 3910 if (error != 0) 3911 return (error); 3912 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3913 if (vp->v_type == VDIR) 3914 error = EISDIR; 3915 else if ((error = vn_writechk(vp)) == 0 && 3916 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3917 vattr_null(&vattr); 3918 vattr.va_size = SCARG(uap, length); 3919 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3920 } 3921 vput(vp); 3922 return (error); 3923 } 3924 3925 /* 3926 * Truncate a file given a file descriptor. 3927 */ 3928 /* ARGSUSED */ 3929 int 3930 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3931 { 3932 /* { 3933 syscallarg(int) fd; 3934 syscallarg(int) pad; 3935 syscallarg(off_t) length; 3936 } */ 3937 struct vattr vattr; 3938 struct vnode *vp; 3939 file_t *fp; 3940 int error; 3941 3942 /* fd_getvnode() will use the descriptor for us */ 3943 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3944 return (error); 3945 if ((fp->f_flag & FWRITE) == 0) { 3946 error = EINVAL; 3947 goto out; 3948 } 3949 vp = fp->f_data; 3950 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3951 if (vp->v_type == VDIR) 3952 error = EISDIR; 3953 else if ((error = vn_writechk(vp)) == 0) { 3954 vattr_null(&vattr); 3955 vattr.va_size = SCARG(uap, length); 3956 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3957 } 3958 VOP_UNLOCK(vp); 3959 out: 3960 fd_putfile(SCARG(uap, fd)); 3961 return (error); 3962 } 3963 3964 /* 3965 * Sync an open file. 3966 */ 3967 /* ARGSUSED */ 3968 int 3969 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3970 { 3971 /* { 3972 syscallarg(int) fd; 3973 } */ 3974 struct vnode *vp; 3975 file_t *fp; 3976 int error; 3977 3978 /* fd_getvnode() will use the descriptor for us */ 3979 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3980 return (error); 3981 vp = fp->f_data; 3982 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3983 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3984 VOP_UNLOCK(vp); 3985 fd_putfile(SCARG(uap, fd)); 3986 return (error); 3987 } 3988 3989 /* 3990 * Sync a range of file data. API modeled after that found in AIX. 3991 * 3992 * FDATASYNC indicates that we need only save enough metadata to be able 3993 * to re-read the written data. Note we duplicate AIX's requirement that 3994 * the file be open for writing. 3995 */ 3996 /* ARGSUSED */ 3997 int 3998 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3999 { 4000 /* { 4001 syscallarg(int) fd; 4002 syscallarg(int) flags; 4003 syscallarg(off_t) start; 4004 syscallarg(off_t) length; 4005 } */ 4006 struct vnode *vp; 4007 file_t *fp; 4008 int flags, nflags; 4009 off_t s, e, len; 4010 int error; 4011 4012 /* fd_getvnode() will use the descriptor for us */ 4013 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4014 return (error); 4015 4016 if ((fp->f_flag & FWRITE) == 0) { 4017 error = EBADF; 4018 goto out; 4019 } 4020 4021 flags = SCARG(uap, flags); 4022 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4023 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4024 error = EINVAL; 4025 goto out; 4026 } 4027 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4028 if (flags & FDATASYNC) 4029 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4030 else 4031 nflags = FSYNC_WAIT; 4032 if (flags & FDISKSYNC) 4033 nflags |= FSYNC_CACHE; 4034 4035 len = SCARG(uap, length); 4036 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4037 if (len) { 4038 s = SCARG(uap, start); 4039 e = s + len; 4040 if (e < s) { 4041 error = EINVAL; 4042 goto out; 4043 } 4044 } else { 4045 e = 0; 4046 s = 0; 4047 } 4048 4049 vp = fp->f_data; 4050 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4051 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4052 VOP_UNLOCK(vp); 4053 out: 4054 fd_putfile(SCARG(uap, fd)); 4055 return (error); 4056 } 4057 4058 /* 4059 * Sync the data of an open file. 4060 */ 4061 /* ARGSUSED */ 4062 int 4063 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4064 { 4065 /* { 4066 syscallarg(int) fd; 4067 } */ 4068 struct vnode *vp; 4069 file_t *fp; 4070 int error; 4071 4072 /* fd_getvnode() will use the descriptor for us */ 4073 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4074 return (error); 4075 if ((fp->f_flag & FWRITE) == 0) { 4076 fd_putfile(SCARG(uap, fd)); 4077 return (EBADF); 4078 } 4079 vp = fp->f_data; 4080 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4081 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4082 VOP_UNLOCK(vp); 4083 fd_putfile(SCARG(uap, fd)); 4084 return (error); 4085 } 4086 4087 /* 4088 * Rename files, (standard) BSD semantics frontend. 4089 */ 4090 /* ARGSUSED */ 4091 int 4092 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4093 { 4094 /* { 4095 syscallarg(const char *) from; 4096 syscallarg(const char *) to; 4097 } */ 4098 4099 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4100 SCARG(uap, to), UIO_USERSPACE, 0)); 4101 } 4102 4103 int 4104 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4105 register_t *retval) 4106 { 4107 /* { 4108 syscallarg(int) fromfd; 4109 syscallarg(const char *) from; 4110 syscallarg(int) tofd; 4111 syscallarg(const char *) to; 4112 } */ 4113 4114 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4115 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4116 } 4117 4118 /* 4119 * Rename files, POSIX semantics frontend. 4120 */ 4121 /* ARGSUSED */ 4122 int 4123 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4124 { 4125 /* { 4126 syscallarg(const char *) from; 4127 syscallarg(const char *) to; 4128 } */ 4129 4130 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4131 SCARG(uap, to), UIO_USERSPACE, 1)); 4132 } 4133 4134 /* 4135 * Rename files. Source and destination must either both be directories, 4136 * or both not be directories. If target is a directory, it must be empty. 4137 * If `from' and `to' refer to the same object, the value of the `retain' 4138 * argument is used to determine whether `from' will be 4139 * 4140 * (retain == 0) deleted unless `from' and `to' refer to the same 4141 * object in the file system's name space (BSD). 4142 * (retain == 1) always retained (POSIX). 4143 * 4144 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4145 */ 4146 int 4147 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4148 { 4149 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4150 } 4151 4152 static int 4153 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4154 const char *to, enum uio_seg seg, int retain) 4155 { 4156 struct pathbuf *fpb, *tpb; 4157 struct nameidata fnd, tnd; 4158 struct vnode *fdvp, *fvp; 4159 struct vnode *tdvp, *tvp; 4160 struct mount *mp, *tmp; 4161 int error; 4162 4163 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4164 4165 error = pathbuf_maybe_copyin(from, seg, &fpb); 4166 if (error) 4167 goto out0; 4168 KASSERT(fpb != NULL); 4169 4170 error = pathbuf_maybe_copyin(to, seg, &tpb); 4171 if (error) 4172 goto out1; 4173 KASSERT(tpb != NULL); 4174 4175 /* 4176 * Lookup from. 4177 * 4178 * XXX LOCKPARENT is wrong because we don't actually want it 4179 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4180 * insane, so for the time being we need to leave it like this. 4181 */ 4182 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb); 4183 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4184 goto out2; 4185 4186 /* 4187 * Pull out the important results of the lookup, fdvp and fvp. 4188 * Of course, fvp is bogus because we're about to unlock fdvp. 4189 */ 4190 fdvp = fnd.ni_dvp; 4191 fvp = fnd.ni_vp; 4192 KASSERT(fdvp != NULL); 4193 KASSERT(fvp != NULL); 4194 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4195 4196 /* 4197 * Make sure neither fdvp nor fvp is locked. 4198 */ 4199 if (fdvp != fvp) 4200 VOP_UNLOCK(fdvp); 4201 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4202 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4203 4204 /* 4205 * Reject renaming `.' and `..'. Can't do this until after 4206 * namei because we need namei's parsing to find the final 4207 * component name. (namei should just leave us with the final 4208 * component name and not look it up itself, but anyway...) 4209 * 4210 * This was here before because we used to relookup from 4211 * instead of to and relookup requires the caller to check 4212 * this, but now file systems may depend on this check, so we 4213 * must retain it until the file systems are all rototilled. 4214 */ 4215 if (((fnd.ni_cnd.cn_namelen == 1) && 4216 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4217 ((fnd.ni_cnd.cn_namelen == 2) && 4218 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4219 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4220 error = EINVAL; /* XXX EISDIR? */ 4221 goto abort0; 4222 } 4223 4224 /* 4225 * Lookup to. 4226 * 4227 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4228 * fvp here to decide whether to add CREATEDIR is a load of 4229 * bollocks because fvp might be the wrong node by now, since 4230 * fdvp is unlocked. 4231 * 4232 * XXX Why not pass CREATEDIR always? 4233 */ 4234 NDINIT(&tnd, RENAME, 4235 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME | 4236 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4237 tpb); 4238 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4239 goto abort0; 4240 4241 /* 4242 * Pull out the important results of the lookup, tdvp and tvp. 4243 * Of course, tvp is bogus because we're about to unlock tdvp. 4244 */ 4245 tdvp = tnd.ni_dvp; 4246 tvp = tnd.ni_vp; 4247 KASSERT(tdvp != NULL); 4248 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4249 4250 /* 4251 * Make sure neither tdvp nor tvp is locked. 4252 */ 4253 if (tdvp != tvp) 4254 VOP_UNLOCK(tdvp); 4255 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4256 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4257 4258 /* 4259 * Reject renaming onto `.' or `..'. relookup is unhappy with 4260 * these, which is why we must do this here. Once upon a time 4261 * we relooked up from instead of to, and consequently didn't 4262 * need this check, but now that we relookup to instead of 4263 * from, we need this; and we shall need it forever forward 4264 * until the VOP_RENAME protocol changes, because file systems 4265 * will no doubt begin to depend on this check. 4266 */ 4267 if (((tnd.ni_cnd.cn_namelen == 1) && 4268 (tnd.ni_cnd.cn_nameptr[0] == '.')) || 4269 ((tnd.ni_cnd.cn_namelen == 2) && 4270 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4271 (tnd.ni_cnd.cn_nameptr[1] == '.'))) { 4272 error = EINVAL; /* XXX EISDIR? */ 4273 goto abort1; 4274 } 4275 4276 /* 4277 * Get the mount point. If the file system has been unmounted, 4278 * which it may be because we're not holding any vnode locks, 4279 * then v_mount will be NULL. We're not really supposed to 4280 * read v_mount without holding the vnode lock, but since we 4281 * have fdvp referenced, if fdvp->v_mount changes then at worst 4282 * it will be set to NULL, not changed to another mount point. 4283 * And, of course, since it is up to the file system to 4284 * determine the real lock order, we can't lock both fdvp and 4285 * tdvp at the same time. 4286 */ 4287 mp = fdvp->v_mount; 4288 if (mp == NULL) { 4289 error = ENOENT; 4290 goto abort1; 4291 } 4292 4293 /* 4294 * Make sure the mount points match. Again, although we don't 4295 * hold any vnode locks, the v_mount fields may change -- but 4296 * at worst they will change to NULL, so this will never become 4297 * a cross-device rename, because we hold vnode references. 4298 * 4299 * XXX Because nothing is locked and the compiler may reorder 4300 * things here, unmounting the file system at an inopportune 4301 * moment may cause rename to fail with ENXDEV when it really 4302 * should fail with ENOENT. 4303 */ 4304 tmp = tdvp->v_mount; 4305 if (tmp == NULL) { 4306 error = ENOENT; 4307 goto abort1; 4308 } 4309 4310 if (mp != tmp) { 4311 error = EXDEV; 4312 goto abort1; 4313 } 4314 4315 /* 4316 * Take the vfs rename lock to avoid cross-directory screw cases. 4317 * Nothing is locked currently, so taking this lock is safe. 4318 */ 4319 error = VFS_RENAMELOCK_ENTER(mp); 4320 if (error) 4321 goto abort1; 4322 4323 /* 4324 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4325 * and nothing is locked except for the vfs rename lock. 4326 * 4327 * The next step is a little rain dance to conform to the 4328 * insane lock protocol, even though it does nothing to ward 4329 * off race conditions. 4330 * 4331 * We need tdvp and tvp to be locked. However, because we have 4332 * unlocked tdvp in order to hold no locks while we take the 4333 * vfs rename lock, tvp may be wrong here, and we can't safely 4334 * lock it even if the sensible file systems will just unlock 4335 * it straight away. Consequently, we must lock tdvp and then 4336 * relookup tvp to get it locked. 4337 * 4338 * Finally, because the VOP_RENAME protocol is brain-damaged 4339 * and various file systems insanely depend on the semantics of 4340 * this brain damage, the lookup of to must be the last lookup 4341 * before VOP_RENAME. 4342 */ 4343 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4344 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4345 if (error) 4346 goto abort2; 4347 4348 /* 4349 * Drop the old tvp and pick up the new one -- which might be 4350 * the same, but that doesn't matter to us. After this, tdvp 4351 * and tvp should both be locked. 4352 */ 4353 if (tvp != NULL) 4354 vrele(tvp); 4355 tvp = tnd.ni_vp; 4356 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4357 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4358 4359 /* 4360 * The old do_sys_rename had various consistency checks here 4361 * involving fvp and tvp. fvp is bogus already here, and tvp 4362 * will become bogus soon in any sensible file system, so the 4363 * only purpose in putting these checks here is to give lip 4364 * service to these screw cases and to acknowledge that they 4365 * exist, not actually to handle them, but here you go 4366 * anyway... 4367 */ 4368 4369 /* 4370 * Acknowledge that directories and non-directories aren't 4371 * suposed to mix. 4372 */ 4373 if (tvp != NULL) { 4374 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4375 error = ENOTDIR; 4376 goto abort3; 4377 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4378 error = EISDIR; 4379 goto abort3; 4380 } 4381 } 4382 4383 /* 4384 * Acknowledge some random screw case, among the dozens that 4385 * might arise. 4386 */ 4387 if (fvp == tdvp) { 4388 error = EINVAL; 4389 goto abort3; 4390 } 4391 4392 /* 4393 * Acknowledge that POSIX has a wacky screw case. 4394 * 4395 * XXX Eventually the retain flag needs to be passed on to 4396 * VOP_RENAME. 4397 */ 4398 if (fvp == tvp) { 4399 if (retain) { 4400 error = 0; 4401 goto abort3; 4402 } else if ((fdvp == tdvp) && 4403 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4404 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4405 fnd.ni_cnd.cn_namelen))) { 4406 error = 0; 4407 goto abort3; 4408 } 4409 } 4410 4411 /* 4412 * Make sure veriexec can screw us up. (But a race can screw 4413 * up veriexec, of course -- remember, fvp and (soon) tvp are 4414 * bogus.) 4415 */ 4416 #if NVERIEXEC > 0 4417 { 4418 char *f1, *f2; 4419 size_t f1_len; 4420 size_t f2_len; 4421 4422 f1_len = fnd.ni_cnd.cn_namelen + 1; 4423 f1 = kmem_alloc(f1_len, KM_SLEEP); 4424 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4425 4426 f2_len = tnd.ni_cnd.cn_namelen + 1; 4427 f2 = kmem_alloc(f2_len, KM_SLEEP); 4428 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4429 4430 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4431 4432 kmem_free(f1, f1_len); 4433 kmem_free(f2, f2_len); 4434 4435 if (error) 4436 goto abort3; 4437 } 4438 #endif /* NVERIEXEC > 0 */ 4439 4440 /* 4441 * All ready. Incant the rename vop. 4442 */ 4443 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4444 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4445 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4446 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4447 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4448 4449 /* 4450 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4451 * tdvp and tvp. But we can't assert any of that. 4452 */ 4453 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4454 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4455 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4456 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4457 4458 /* 4459 * So all we have left to do is to drop the rename lock and 4460 * destroy the pathbufs. 4461 */ 4462 VFS_RENAMELOCK_EXIT(mp); 4463 goto out2; 4464 4465 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4466 VOP_UNLOCK(tvp); 4467 abort2: VOP_UNLOCK(tdvp); 4468 VFS_RENAMELOCK_EXIT(mp); 4469 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4470 vrele(tdvp); 4471 if (tvp != NULL) 4472 vrele(tvp); 4473 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4474 vrele(fdvp); 4475 vrele(fvp); 4476 out2: pathbuf_destroy(tpb); 4477 out1: pathbuf_destroy(fpb); 4478 out0: return error; 4479 } 4480 4481 /* 4482 * Make a directory file. 4483 */ 4484 /* ARGSUSED */ 4485 int 4486 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4487 { 4488 /* { 4489 syscallarg(const char *) path; 4490 syscallarg(int) mode; 4491 } */ 4492 4493 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4494 SCARG(uap, mode), UIO_USERSPACE); 4495 } 4496 4497 int 4498 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4499 register_t *retval) 4500 { 4501 /* { 4502 syscallarg(int) fd; 4503 syscallarg(const char *) path; 4504 syscallarg(int) mode; 4505 } */ 4506 4507 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4508 SCARG(uap, mode), UIO_USERSPACE); 4509 } 4510 4511 4512 int 4513 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4514 { 4515 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4516 } 4517 4518 static int 4519 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4520 enum uio_seg seg) 4521 { 4522 struct proc *p = curlwp->l_proc; 4523 struct vnode *vp; 4524 struct vattr vattr; 4525 int error; 4526 struct pathbuf *pb; 4527 struct nameidata nd; 4528 4529 KASSERT(l != NULL || fdat == AT_FDCWD); 4530 4531 /* XXX bollocks, should pass in a pathbuf */ 4532 error = pathbuf_maybe_copyin(path, seg, &pb); 4533 if (error) { 4534 return error; 4535 } 4536 4537 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4538 4539 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4540 pathbuf_destroy(pb); 4541 return (error); 4542 } 4543 vp = nd.ni_vp; 4544 if (vp != NULL) { 4545 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4546 if (nd.ni_dvp == vp) 4547 vrele(nd.ni_dvp); 4548 else 4549 vput(nd.ni_dvp); 4550 vrele(vp); 4551 pathbuf_destroy(pb); 4552 return (EEXIST); 4553 } 4554 vattr_null(&vattr); 4555 vattr.va_type = VDIR; 4556 /* We will read cwdi->cwdi_cmask unlocked. */ 4557 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4558 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4559 if (!error) 4560 vput(nd.ni_vp); 4561 pathbuf_destroy(pb); 4562 return (error); 4563 } 4564 4565 /* 4566 * Remove a directory file. 4567 */ 4568 /* ARGSUSED */ 4569 int 4570 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4571 { 4572 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4573 AT_REMOVEDIR, UIO_USERSPACE); 4574 } 4575 4576 /* 4577 * Read a block of directory entries in a file system independent format. 4578 */ 4579 int 4580 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4581 { 4582 /* { 4583 syscallarg(int) fd; 4584 syscallarg(char *) buf; 4585 syscallarg(size_t) count; 4586 } */ 4587 file_t *fp; 4588 int error, done; 4589 4590 /* fd_getvnode() will use the descriptor for us */ 4591 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4592 return (error); 4593 if ((fp->f_flag & FREAD) == 0) { 4594 error = EBADF; 4595 goto out; 4596 } 4597 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4598 SCARG(uap, count), &done, l, 0, 0); 4599 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4600 *retval = done; 4601 out: 4602 fd_putfile(SCARG(uap, fd)); 4603 return (error); 4604 } 4605 4606 /* 4607 * Set the mode mask for creation of filesystem nodes. 4608 */ 4609 int 4610 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4611 { 4612 /* { 4613 syscallarg(mode_t) newmask; 4614 } */ 4615 struct proc *p = l->l_proc; 4616 struct cwdinfo *cwdi; 4617 4618 /* 4619 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4620 * important is that we serialize changes to the mask. The 4621 * rw_exit() will issue a write memory barrier on our behalf, 4622 * and force the changes out to other CPUs (as it must use an 4623 * atomic operation, draining the local CPU's store buffers). 4624 */ 4625 cwdi = p->p_cwdi; 4626 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4627 *retval = cwdi->cwdi_cmask; 4628 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4629 rw_exit(&cwdi->cwdi_lock); 4630 4631 return (0); 4632 } 4633 4634 int 4635 dorevoke(struct vnode *vp, kauth_cred_t cred) 4636 { 4637 struct vattr vattr; 4638 int error, fs_decision; 4639 4640 vn_lock(vp, LK_SHARED | LK_RETRY); 4641 error = VOP_GETATTR(vp, &vattr, cred); 4642 VOP_UNLOCK(vp); 4643 if (error != 0) 4644 return error; 4645 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4646 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4647 fs_decision); 4648 if (!error) 4649 VOP_REVOKE(vp, REVOKEALL); 4650 return (error); 4651 } 4652 4653 /* 4654 * Void all references to file by ripping underlying filesystem 4655 * away from vnode. 4656 */ 4657 /* ARGSUSED */ 4658 int 4659 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4660 { 4661 /* { 4662 syscallarg(const char *) path; 4663 } */ 4664 struct vnode *vp; 4665 int error; 4666 4667 error = namei_simple_user(SCARG(uap, path), 4668 NSM_FOLLOW_TRYEMULROOT, &vp); 4669 if (error != 0) 4670 return (error); 4671 error = dorevoke(vp, l->l_cred); 4672 vrele(vp); 4673 return (error); 4674 } 4675