1 /* $NetBSD: vfs_syscalls.c,v 1.476 2014/02/15 22:32:16 njoly Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.476 2014/02/15 22:32:16 njoly Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 #include <miscfs/specfs/specdev.h> 113 114 #include <nfs/rpcv2.h> 115 #include <nfs/nfsproto.h> 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 119 static int change_flags(struct vnode *, u_long, struct lwp *); 120 static int change_mode(struct vnode *, int, struct lwp *); 121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 122 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 123 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 124 enum uio_seg); 125 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 126 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 127 enum uio_seg); 128 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 129 enum uio_seg, int); 130 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 131 size_t, register_t *); 132 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 133 134 static int fd_nameiat(struct lwp *, int, struct nameidata *); 135 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 136 namei_simple_flags_t, struct vnode **); 137 138 139 /* 140 * This table is used to maintain compatibility with 4.3BSD 141 * and NetBSD 0.9 mount syscalls - and possibly other systems. 142 * Note, the order is important! 143 * 144 * Do not modify this table. It should only contain filesystems 145 * supported by NetBSD 0.9 and 4.3BSD. 146 */ 147 const char * const mountcompatnames[] = { 148 NULL, /* 0 = MOUNT_NONE */ 149 MOUNT_FFS, /* 1 = MOUNT_UFS */ 150 MOUNT_NFS, /* 2 */ 151 MOUNT_MFS, /* 3 */ 152 MOUNT_MSDOS, /* 4 */ 153 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 154 MOUNT_FDESC, /* 6 */ 155 MOUNT_KERNFS, /* 7 */ 156 NULL, /* 8 = MOUNT_DEVFS */ 157 MOUNT_AFS, /* 9 */ 158 }; 159 160 const int nmountcompatnames = __arraycount(mountcompatnames); 161 162 static int 163 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 164 { 165 file_t *dfp; 166 int error; 167 168 if (fdat != AT_FDCWD) { 169 if ((error = fd_getvnode(fdat, &dfp)) != 0) 170 goto out; 171 172 NDAT(ndp, dfp->f_data); 173 } 174 175 error = namei(ndp); 176 177 if (fdat != AT_FDCWD) 178 fd_putfile(fdat); 179 out: 180 return error; 181 } 182 183 static int 184 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 185 namei_simple_flags_t sflags, struct vnode **vp_ret) 186 { 187 file_t *dfp; 188 struct vnode *dvp; 189 int error; 190 191 if (fdat != AT_FDCWD) { 192 if ((error = fd_getvnode(fdat, &dfp)) != 0) 193 goto out; 194 195 dvp = dfp->f_data; 196 } else { 197 dvp = NULL; 198 } 199 200 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 201 202 if (fdat != AT_FDCWD) 203 fd_putfile(fdat); 204 out: 205 return error; 206 } 207 208 static int 209 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 210 { 211 int error; 212 213 fp->f_flag = flags & FMASK; 214 fp->f_type = DTYPE_VNODE; 215 fp->f_ops = &vnops; 216 fp->f_data = vp; 217 218 if (flags & (O_EXLOCK | O_SHLOCK)) { 219 struct flock lf; 220 int type; 221 222 lf.l_whence = SEEK_SET; 223 lf.l_start = 0; 224 lf.l_len = 0; 225 if (flags & O_EXLOCK) 226 lf.l_type = F_WRLCK; 227 else 228 lf.l_type = F_RDLCK; 229 type = F_FLOCK; 230 if ((flags & FNONBLOCK) == 0) 231 type |= F_WAIT; 232 VOP_UNLOCK(vp); 233 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 234 if (error) { 235 (void) vn_close(vp, fp->f_flag, fp->f_cred); 236 fd_abort(l->l_proc, fp, indx); 237 return error; 238 } 239 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 240 atomic_or_uint(&fp->f_flag, FHASLOCK); 241 } 242 if (flags & O_CLOEXEC) 243 fd_set_exclose(l, indx, true); 244 return 0; 245 } 246 247 static int 248 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 249 void *data, size_t *data_len) 250 { 251 struct mount *mp; 252 int error = 0, saved_flags; 253 254 mp = vp->v_mount; 255 saved_flags = mp->mnt_flag; 256 257 /* We can operate only on VV_ROOT nodes. */ 258 if ((vp->v_vflag & VV_ROOT) == 0) { 259 error = EINVAL; 260 goto out; 261 } 262 263 /* 264 * We only allow the filesystem to be reloaded if it 265 * is currently mounted read-only. Additionally, we 266 * prevent read-write to read-only downgrades. 267 */ 268 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 269 (mp->mnt_flag & MNT_RDONLY) == 0 && 270 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 271 error = EOPNOTSUPP; /* Needs translation */ 272 goto out; 273 } 274 275 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 276 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 277 if (error) 278 goto out; 279 280 if (vfs_busy(mp, NULL)) { 281 error = EPERM; 282 goto out; 283 } 284 285 mutex_enter(&mp->mnt_updating); 286 287 mp->mnt_flag &= ~MNT_OP_FLAGS; 288 mp->mnt_flag |= flags & MNT_OP_FLAGS; 289 290 /* 291 * Set the mount level flags. 292 */ 293 if (flags & MNT_RDONLY) 294 mp->mnt_flag |= MNT_RDONLY; 295 else if (mp->mnt_flag & MNT_RDONLY) 296 mp->mnt_iflag |= IMNT_WANTRDWR; 297 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 298 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 299 error = VFS_MOUNT(mp, path, data, data_len); 300 301 if (error && data != NULL) { 302 int error2; 303 304 /* 305 * Update failed; let's try and see if it was an 306 * export request. For compat with 3.0 and earlier. 307 */ 308 error2 = vfs_hooks_reexport(mp, path, data); 309 310 /* 311 * Only update error code if the export request was 312 * understood but some problem occurred while 313 * processing it. 314 */ 315 if (error2 != EJUSTRETURN) 316 error = error2; 317 } 318 319 if (mp->mnt_iflag & IMNT_WANTRDWR) 320 mp->mnt_flag &= ~MNT_RDONLY; 321 if (error) 322 mp->mnt_flag = saved_flags; 323 mp->mnt_flag &= ~MNT_OP_FLAGS; 324 mp->mnt_iflag &= ~IMNT_WANTRDWR; 325 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 326 if (mp->mnt_syncer == NULL) 327 error = vfs_allocate_syncvnode(mp); 328 } else { 329 if (mp->mnt_syncer != NULL) 330 vfs_deallocate_syncvnode(mp); 331 } 332 mutex_exit(&mp->mnt_updating); 333 vfs_unbusy(mp, false, NULL); 334 335 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 336 (flags & MNT_EXTATTR)) { 337 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 338 NULL, 0, NULL) != 0) { 339 printf("%s: failed to start extattr, error = %d", 340 mp->mnt_stat.f_mntonname, error); 341 mp->mnt_flag &= ~MNT_EXTATTR; 342 } 343 } 344 345 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 346 !(flags & MNT_EXTATTR)) { 347 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 348 NULL, 0, NULL) != 0) { 349 printf("%s: failed to stop extattr, error = %d", 350 mp->mnt_stat.f_mntonname, error); 351 mp->mnt_flag |= MNT_RDONLY; 352 } 353 } 354 out: 355 return (error); 356 } 357 358 static int 359 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 360 { 361 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 362 int error; 363 364 /* Copy file-system type from userspace. */ 365 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 366 if (error) { 367 /* 368 * Historically, filesystem types were identified by numbers. 369 * If we get an integer for the filesystem type instead of a 370 * string, we check to see if it matches one of the historic 371 * filesystem types. 372 */ 373 u_long fsindex = (u_long)fstype; 374 if (fsindex >= nmountcompatnames || 375 mountcompatnames[fsindex] == NULL) 376 return ENODEV; 377 strlcpy(fstypename, mountcompatnames[fsindex], 378 sizeof(fstypename)); 379 } 380 381 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 382 if (strcmp(fstypename, "ufs") == 0) 383 fstypename[0] = 'f'; 384 385 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 386 return 0; 387 388 /* If we can autoload a vfs module, try again */ 389 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 390 391 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 392 return 0; 393 394 return ENODEV; 395 } 396 397 static int 398 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 399 void *data, size_t *data_len) 400 { 401 struct mount *mp; 402 int error; 403 404 /* If MNT_GETARGS is specified, it should be the only flag. */ 405 if (flags & ~MNT_GETARGS) 406 return EINVAL; 407 408 mp = vp->v_mount; 409 410 /* XXX: probably some notion of "can see" here if we want isolation. */ 411 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 412 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 413 if (error) 414 return error; 415 416 if ((vp->v_vflag & VV_ROOT) == 0) 417 return EINVAL; 418 419 if (vfs_busy(mp, NULL)) 420 return EPERM; 421 422 mutex_enter(&mp->mnt_updating); 423 mp->mnt_flag &= ~MNT_OP_FLAGS; 424 mp->mnt_flag |= MNT_GETARGS; 425 error = VFS_MOUNT(mp, path, data, data_len); 426 mp->mnt_flag &= ~MNT_OP_FLAGS; 427 mutex_exit(&mp->mnt_updating); 428 429 vfs_unbusy(mp, false, NULL); 430 return (error); 431 } 432 433 int 434 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 435 { 436 /* { 437 syscallarg(const char *) type; 438 syscallarg(const char *) path; 439 syscallarg(int) flags; 440 syscallarg(void *) data; 441 syscallarg(size_t) data_len; 442 } */ 443 444 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 445 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 446 SCARG(uap, data_len), retval); 447 } 448 449 int 450 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 451 const char *path, int flags, void *data, enum uio_seg data_seg, 452 size_t data_len, register_t *retval) 453 { 454 struct vnode *vp; 455 void *data_buf = data; 456 bool vfsopsrele = false; 457 int error; 458 459 /* XXX: The calling convention of this routine is totally bizarre */ 460 if (vfsops) 461 vfsopsrele = true; 462 463 /* 464 * Get vnode to be covered 465 */ 466 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 467 if (error != 0) { 468 vp = NULL; 469 goto done; 470 } 471 472 if (vfsops == NULL) { 473 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 474 vfsops = vp->v_mount->mnt_op; 475 } else { 476 /* 'type' is userspace */ 477 error = mount_get_vfsops(type, &vfsops); 478 if (error != 0) 479 goto done; 480 vfsopsrele = true; 481 } 482 } 483 484 if (data != NULL && data_seg == UIO_USERSPACE) { 485 if (data_len == 0) { 486 /* No length supplied, use default for filesystem */ 487 data_len = vfsops->vfs_min_mount_data; 488 if (data_len > VFS_MAX_MOUNT_DATA) { 489 error = EINVAL; 490 goto done; 491 } 492 /* 493 * Hopefully a longer buffer won't make copyin() fail. 494 * For compatibility with 3.0 and earlier. 495 */ 496 if (flags & MNT_UPDATE 497 && data_len < sizeof (struct mnt_export_args30)) 498 data_len = sizeof (struct mnt_export_args30); 499 } 500 data_buf = kmem_alloc(data_len, KM_SLEEP); 501 502 /* NFS needs the buffer even for mnt_getargs .... */ 503 error = copyin(data, data_buf, data_len); 504 if (error != 0) 505 goto done; 506 } 507 508 if (flags & MNT_GETARGS) { 509 if (data_len == 0) { 510 error = EINVAL; 511 goto done; 512 } 513 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 514 if (error != 0) 515 goto done; 516 if (data_seg == UIO_USERSPACE) 517 error = copyout(data_buf, data, data_len); 518 *retval = data_len; 519 } else if (flags & MNT_UPDATE) { 520 error = mount_update(l, vp, path, flags, data_buf, &data_len); 521 } else { 522 /* Locking is handled internally in mount_domount(). */ 523 KASSERT(vfsopsrele == true); 524 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 525 &data_len); 526 vfsopsrele = false; 527 } 528 529 done: 530 if (vfsopsrele) 531 vfs_delref(vfsops); 532 if (vp != NULL) { 533 vrele(vp); 534 } 535 if (data_buf != data) 536 kmem_free(data_buf, data_len); 537 return (error); 538 } 539 540 /* 541 * Unmount a file system. 542 * 543 * Note: unmount takes a path to the vnode mounted on as argument, 544 * not special file (as before). 545 */ 546 /* ARGSUSED */ 547 int 548 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 549 { 550 /* { 551 syscallarg(const char *) path; 552 syscallarg(int) flags; 553 } */ 554 struct vnode *vp; 555 struct mount *mp; 556 int error; 557 struct pathbuf *pb; 558 struct nameidata nd; 559 560 error = pathbuf_copyin(SCARG(uap, path), &pb); 561 if (error) { 562 return error; 563 } 564 565 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 566 if ((error = namei(&nd)) != 0) { 567 pathbuf_destroy(pb); 568 return error; 569 } 570 vp = nd.ni_vp; 571 pathbuf_destroy(pb); 572 573 mp = vp->v_mount; 574 atomic_inc_uint(&mp->mnt_refcnt); 575 VOP_UNLOCK(vp); 576 577 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 578 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 579 if (error) { 580 vrele(vp); 581 vfs_destroy(mp); 582 return (error); 583 } 584 585 /* 586 * Don't allow unmounting the root file system. 587 */ 588 if (mp->mnt_flag & MNT_ROOTFS) { 589 vrele(vp); 590 vfs_destroy(mp); 591 return (EINVAL); 592 } 593 594 /* 595 * Must be the root of the filesystem 596 */ 597 if ((vp->v_vflag & VV_ROOT) == 0) { 598 vrele(vp); 599 vfs_destroy(mp); 600 return (EINVAL); 601 } 602 603 vrele(vp); 604 error = dounmount(mp, SCARG(uap, flags), l); 605 vfs_destroy(mp); 606 return error; 607 } 608 609 /* 610 * Sync each mounted filesystem. 611 */ 612 #ifdef DEBUG 613 int syncprt = 0; 614 struct ctldebug debug0 = { "syncprt", &syncprt }; 615 #endif 616 617 void 618 do_sys_sync(struct lwp *l) 619 { 620 struct mount *mp, *nmp; 621 int asyncflag; 622 623 mutex_enter(&mountlist_lock); 624 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 625 if (vfs_busy(mp, &nmp)) { 626 continue; 627 } 628 mutex_enter(&mp->mnt_updating); 629 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 630 asyncflag = mp->mnt_flag & MNT_ASYNC; 631 mp->mnt_flag &= ~MNT_ASYNC; 632 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 633 if (asyncflag) 634 mp->mnt_flag |= MNT_ASYNC; 635 } 636 mutex_exit(&mp->mnt_updating); 637 vfs_unbusy(mp, false, &nmp); 638 } 639 mutex_exit(&mountlist_lock); 640 #ifdef DEBUG 641 if (syncprt) 642 vfs_bufstats(); 643 #endif /* DEBUG */ 644 } 645 646 /* ARGSUSED */ 647 int 648 sys_sync(struct lwp *l, const void *v, register_t *retval) 649 { 650 do_sys_sync(l); 651 return (0); 652 } 653 654 655 /* 656 * Access or change filesystem quotas. 657 * 658 * (this is really 14 different calls bundled into one) 659 */ 660 661 static int 662 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 663 { 664 struct quotastat info_k; 665 int error; 666 667 /* ensure any padding bytes are cleared */ 668 memset(&info_k, 0, sizeof(info_k)); 669 670 error = vfs_quotactl_stat(mp, &info_k); 671 if (error) { 672 return error; 673 } 674 675 return copyout(&info_k, info_u, sizeof(info_k)); 676 } 677 678 static int 679 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 680 struct quotaidtypestat *info_u) 681 { 682 struct quotaidtypestat info_k; 683 int error; 684 685 /* ensure any padding bytes are cleared */ 686 memset(&info_k, 0, sizeof(info_k)); 687 688 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 689 if (error) { 690 return error; 691 } 692 693 return copyout(&info_k, info_u, sizeof(info_k)); 694 } 695 696 static int 697 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 698 struct quotaobjtypestat *info_u) 699 { 700 struct quotaobjtypestat info_k; 701 int error; 702 703 /* ensure any padding bytes are cleared */ 704 memset(&info_k, 0, sizeof(info_k)); 705 706 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 707 if (error) { 708 return error; 709 } 710 711 return copyout(&info_k, info_u, sizeof(info_k)); 712 } 713 714 static int 715 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 716 struct quotaval *val_u) 717 { 718 struct quotakey key_k; 719 struct quotaval val_k; 720 int error; 721 722 /* ensure any padding bytes are cleared */ 723 memset(&val_k, 0, sizeof(val_k)); 724 725 error = copyin(key_u, &key_k, sizeof(key_k)); 726 if (error) { 727 return error; 728 } 729 730 error = vfs_quotactl_get(mp, &key_k, &val_k); 731 if (error) { 732 return error; 733 } 734 735 return copyout(&val_k, val_u, sizeof(val_k)); 736 } 737 738 static int 739 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 740 const struct quotaval *val_u) 741 { 742 struct quotakey key_k; 743 struct quotaval val_k; 744 int error; 745 746 error = copyin(key_u, &key_k, sizeof(key_k)); 747 if (error) { 748 return error; 749 } 750 751 error = copyin(val_u, &val_k, sizeof(val_k)); 752 if (error) { 753 return error; 754 } 755 756 return vfs_quotactl_put(mp, &key_k, &val_k); 757 } 758 759 static int 760 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u) 761 { 762 struct quotakey key_k; 763 int error; 764 765 error = copyin(key_u, &key_k, sizeof(key_k)); 766 if (error) { 767 return error; 768 } 769 770 return vfs_quotactl_delete(mp, &key_k); 771 } 772 773 static int 774 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 775 { 776 struct quotakcursor cursor_k; 777 int error; 778 779 /* ensure any padding bytes are cleared */ 780 memset(&cursor_k, 0, sizeof(cursor_k)); 781 782 error = vfs_quotactl_cursoropen(mp, &cursor_k); 783 if (error) { 784 return error; 785 } 786 787 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 788 } 789 790 static int 791 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 792 { 793 struct quotakcursor cursor_k; 794 int error; 795 796 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 797 if (error) { 798 return error; 799 } 800 801 return vfs_quotactl_cursorclose(mp, &cursor_k); 802 } 803 804 static int 805 do_sys_quotactl_cursorskipidtype(struct mount *mp, 806 struct quotakcursor *cursor_u, int idtype) 807 { 808 struct quotakcursor cursor_k; 809 int error; 810 811 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 812 if (error) { 813 return error; 814 } 815 816 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 817 if (error) { 818 return error; 819 } 820 821 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 822 } 823 824 static int 825 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 826 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 827 unsigned *ret_u) 828 { 829 #define CGET_STACK_MAX 8 830 struct quotakcursor cursor_k; 831 struct quotakey stackkeys[CGET_STACK_MAX]; 832 struct quotaval stackvals[CGET_STACK_MAX]; 833 struct quotakey *keys_k; 834 struct quotaval *vals_k; 835 unsigned ret_k; 836 int error; 837 838 if (maxnum > 128) { 839 maxnum = 128; 840 } 841 842 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 843 if (error) { 844 return error; 845 } 846 847 if (maxnum <= CGET_STACK_MAX) { 848 keys_k = stackkeys; 849 vals_k = stackvals; 850 /* ensure any padding bytes are cleared */ 851 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 852 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 853 } else { 854 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 855 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 856 } 857 858 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 859 &ret_k); 860 if (error) { 861 goto fail; 862 } 863 864 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 865 if (error) { 866 goto fail; 867 } 868 869 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 870 if (error) { 871 goto fail; 872 } 873 874 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 875 if (error) { 876 goto fail; 877 } 878 879 /* do last to maximize the chance of being able to recover a failure */ 880 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 881 882 fail: 883 if (keys_k != stackkeys) { 884 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 885 } 886 if (vals_k != stackvals) { 887 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 888 } 889 return error; 890 } 891 892 static int 893 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 894 int *ret_u) 895 { 896 struct quotakcursor cursor_k; 897 int ret_k; 898 int error; 899 900 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 901 if (error) { 902 return error; 903 } 904 905 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 906 if (error) { 907 return error; 908 } 909 910 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 911 if (error) { 912 return error; 913 } 914 915 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 916 } 917 918 static int 919 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 920 { 921 struct quotakcursor cursor_k; 922 int error; 923 924 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 925 if (error) { 926 return error; 927 } 928 929 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 930 if (error) { 931 return error; 932 } 933 934 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 935 } 936 937 static int 938 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 939 { 940 char *path_k; 941 int error; 942 943 /* XXX this should probably be a struct pathbuf */ 944 path_k = PNBUF_GET(); 945 error = copyin(path_u, path_k, PATH_MAX); 946 if (error) { 947 PNBUF_PUT(path_k); 948 return error; 949 } 950 951 error = vfs_quotactl_quotaon(mp, idtype, path_k); 952 953 PNBUF_PUT(path_k); 954 return error; 955 } 956 957 static int 958 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 959 { 960 return vfs_quotactl_quotaoff(mp, idtype); 961 } 962 963 int 964 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 965 { 966 struct mount *mp; 967 struct vnode *vp; 968 int error; 969 970 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 971 if (error != 0) 972 return (error); 973 mp = vp->v_mount; 974 975 switch (args->qc_op) { 976 case QUOTACTL_STAT: 977 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 978 break; 979 case QUOTACTL_IDTYPESTAT: 980 error = do_sys_quotactl_idtypestat(mp, 981 args->u.idtypestat.qc_idtype, 982 args->u.idtypestat.qc_info); 983 break; 984 case QUOTACTL_OBJTYPESTAT: 985 error = do_sys_quotactl_objtypestat(mp, 986 args->u.objtypestat.qc_objtype, 987 args->u.objtypestat.qc_info); 988 break; 989 case QUOTACTL_GET: 990 error = do_sys_quotactl_get(mp, 991 args->u.get.qc_key, 992 args->u.get.qc_val); 993 break; 994 case QUOTACTL_PUT: 995 error = do_sys_quotactl_put(mp, 996 args->u.put.qc_key, 997 args->u.put.qc_val); 998 break; 999 case QUOTACTL_DELETE: 1000 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key); 1001 break; 1002 case QUOTACTL_CURSOROPEN: 1003 error = do_sys_quotactl_cursoropen(mp, 1004 args->u.cursoropen.qc_cursor); 1005 break; 1006 case QUOTACTL_CURSORCLOSE: 1007 error = do_sys_quotactl_cursorclose(mp, 1008 args->u.cursorclose.qc_cursor); 1009 break; 1010 case QUOTACTL_CURSORSKIPIDTYPE: 1011 error = do_sys_quotactl_cursorskipidtype(mp, 1012 args->u.cursorskipidtype.qc_cursor, 1013 args->u.cursorskipidtype.qc_idtype); 1014 break; 1015 case QUOTACTL_CURSORGET: 1016 error = do_sys_quotactl_cursorget(mp, 1017 args->u.cursorget.qc_cursor, 1018 args->u.cursorget.qc_keys, 1019 args->u.cursorget.qc_vals, 1020 args->u.cursorget.qc_maxnum, 1021 args->u.cursorget.qc_ret); 1022 break; 1023 case QUOTACTL_CURSORATEND: 1024 error = do_sys_quotactl_cursoratend(mp, 1025 args->u.cursoratend.qc_cursor, 1026 args->u.cursoratend.qc_ret); 1027 break; 1028 case QUOTACTL_CURSORREWIND: 1029 error = do_sys_quotactl_cursorrewind(mp, 1030 args->u.cursorrewind.qc_cursor); 1031 break; 1032 case QUOTACTL_QUOTAON: 1033 error = do_sys_quotactl_quotaon(mp, 1034 args->u.quotaon.qc_idtype, 1035 args->u.quotaon.qc_quotafile); 1036 break; 1037 case QUOTACTL_QUOTAOFF: 1038 error = do_sys_quotactl_quotaoff(mp, 1039 args->u.quotaoff.qc_idtype); 1040 break; 1041 default: 1042 error = EINVAL; 1043 break; 1044 } 1045 1046 vrele(vp); 1047 return error; 1048 } 1049 1050 /* ARGSUSED */ 1051 int 1052 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1053 register_t *retval) 1054 { 1055 /* { 1056 syscallarg(const char *) path; 1057 syscallarg(struct quotactl_args *) args; 1058 } */ 1059 struct quotactl_args args; 1060 int error; 1061 1062 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1063 if (error) { 1064 return error; 1065 } 1066 1067 return do_sys_quotactl(SCARG(uap, path), &args); 1068 } 1069 1070 int 1071 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1072 int root) 1073 { 1074 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1075 int error = 0; 1076 1077 /* 1078 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1079 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1080 * overrides MNT_NOWAIT. 1081 */ 1082 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1083 (flags != MNT_WAIT && flags != 0)) { 1084 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1085 goto done; 1086 } 1087 1088 /* Get the filesystem stats now */ 1089 memset(sp, 0, sizeof(*sp)); 1090 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1091 return error; 1092 } 1093 1094 if (cwdi->cwdi_rdir == NULL) 1095 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1096 done: 1097 if (cwdi->cwdi_rdir != NULL) { 1098 size_t len; 1099 char *bp; 1100 char c; 1101 char *path = PNBUF_GET(); 1102 1103 bp = path + MAXPATHLEN; 1104 *--bp = '\0'; 1105 rw_enter(&cwdi->cwdi_lock, RW_READER); 1106 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1107 MAXPATHLEN / 2, 0, l); 1108 rw_exit(&cwdi->cwdi_lock); 1109 if (error) { 1110 PNBUF_PUT(path); 1111 return error; 1112 } 1113 len = strlen(bp); 1114 if (len != 1) { 1115 /* 1116 * for mount points that are below our root, we can see 1117 * them, so we fix up the pathname and return them. The 1118 * rest we cannot see, so we don't allow viewing the 1119 * data. 1120 */ 1121 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1122 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1123 (void)strlcpy(sp->f_mntonname, 1124 c == '\0' ? "/" : &sp->f_mntonname[len], 1125 sizeof(sp->f_mntonname)); 1126 } else { 1127 if (root) 1128 (void)strlcpy(sp->f_mntonname, "/", 1129 sizeof(sp->f_mntonname)); 1130 else 1131 error = EPERM; 1132 } 1133 } 1134 PNBUF_PUT(path); 1135 } 1136 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1137 return error; 1138 } 1139 1140 /* 1141 * Get filesystem statistics by path. 1142 */ 1143 int 1144 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1145 { 1146 struct mount *mp; 1147 int error; 1148 struct vnode *vp; 1149 1150 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1151 if (error != 0) 1152 return error; 1153 mp = vp->v_mount; 1154 error = dostatvfs(mp, sb, l, flags, 1); 1155 vrele(vp); 1156 return error; 1157 } 1158 1159 /* ARGSUSED */ 1160 int 1161 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1162 { 1163 /* { 1164 syscallarg(const char *) path; 1165 syscallarg(struct statvfs *) buf; 1166 syscallarg(int) flags; 1167 } */ 1168 struct statvfs *sb; 1169 int error; 1170 1171 sb = STATVFSBUF_GET(); 1172 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1173 if (error == 0) 1174 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1175 STATVFSBUF_PUT(sb); 1176 return error; 1177 } 1178 1179 /* 1180 * Get filesystem statistics by fd. 1181 */ 1182 int 1183 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1184 { 1185 file_t *fp; 1186 struct mount *mp; 1187 int error; 1188 1189 /* fd_getvnode() will use the descriptor for us */ 1190 if ((error = fd_getvnode(fd, &fp)) != 0) 1191 return (error); 1192 mp = ((struct vnode *)fp->f_data)->v_mount; 1193 error = dostatvfs(mp, sb, curlwp, flags, 1); 1194 fd_putfile(fd); 1195 return error; 1196 } 1197 1198 /* ARGSUSED */ 1199 int 1200 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1201 { 1202 /* { 1203 syscallarg(int) fd; 1204 syscallarg(struct statvfs *) buf; 1205 syscallarg(int) flags; 1206 } */ 1207 struct statvfs *sb; 1208 int error; 1209 1210 sb = STATVFSBUF_GET(); 1211 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1212 if (error == 0) 1213 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1214 STATVFSBUF_PUT(sb); 1215 return error; 1216 } 1217 1218 1219 /* 1220 * Get statistics on all filesystems. 1221 */ 1222 int 1223 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1224 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1225 register_t *retval) 1226 { 1227 int root = 0; 1228 struct proc *p = l->l_proc; 1229 struct mount *mp, *nmp; 1230 struct statvfs *sb; 1231 size_t count, maxcount; 1232 int error = 0; 1233 1234 sb = STATVFSBUF_GET(); 1235 maxcount = bufsize / entry_sz; 1236 mutex_enter(&mountlist_lock); 1237 count = 0; 1238 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1239 if (vfs_busy(mp, &nmp)) { 1240 continue; 1241 } 1242 if (sfsp && count < maxcount) { 1243 error = dostatvfs(mp, sb, l, flags, 0); 1244 if (error) { 1245 vfs_unbusy(mp, false, &nmp); 1246 error = 0; 1247 continue; 1248 } 1249 error = copyfn(sb, sfsp, entry_sz); 1250 if (error) { 1251 vfs_unbusy(mp, false, NULL); 1252 goto out; 1253 } 1254 sfsp = (char *)sfsp + entry_sz; 1255 root |= strcmp(sb->f_mntonname, "/") == 0; 1256 } 1257 count++; 1258 vfs_unbusy(mp, false, &nmp); 1259 } 1260 mutex_exit(&mountlist_lock); 1261 1262 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1263 /* 1264 * fake a root entry 1265 */ 1266 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1267 sb, l, flags, 1); 1268 if (error != 0) 1269 goto out; 1270 if (sfsp) { 1271 error = copyfn(sb, sfsp, entry_sz); 1272 if (error != 0) 1273 goto out; 1274 } 1275 count++; 1276 } 1277 if (sfsp && count > maxcount) 1278 *retval = maxcount; 1279 else 1280 *retval = count; 1281 out: 1282 STATVFSBUF_PUT(sb); 1283 return error; 1284 } 1285 1286 int 1287 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1288 { 1289 /* { 1290 syscallarg(struct statvfs *) buf; 1291 syscallarg(size_t) bufsize; 1292 syscallarg(int) flags; 1293 } */ 1294 1295 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1296 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1297 } 1298 1299 /* 1300 * Change current working directory to a given file descriptor. 1301 */ 1302 /* ARGSUSED */ 1303 int 1304 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1305 { 1306 /* { 1307 syscallarg(int) fd; 1308 } */ 1309 struct proc *p = l->l_proc; 1310 struct cwdinfo *cwdi; 1311 struct vnode *vp, *tdp; 1312 struct mount *mp; 1313 file_t *fp; 1314 int error, fd; 1315 1316 /* fd_getvnode() will use the descriptor for us */ 1317 fd = SCARG(uap, fd); 1318 if ((error = fd_getvnode(fd, &fp)) != 0) 1319 return (error); 1320 vp = fp->f_data; 1321 1322 vref(vp); 1323 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1324 if (vp->v_type != VDIR) 1325 error = ENOTDIR; 1326 else 1327 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1328 if (error) { 1329 vput(vp); 1330 goto out; 1331 } 1332 while ((mp = vp->v_mountedhere) != NULL) { 1333 error = vfs_busy(mp, NULL); 1334 vput(vp); 1335 if (error != 0) 1336 goto out; 1337 error = VFS_ROOT(mp, &tdp); 1338 vfs_unbusy(mp, false, NULL); 1339 if (error) 1340 goto out; 1341 vp = tdp; 1342 } 1343 VOP_UNLOCK(vp); 1344 1345 /* 1346 * Disallow changing to a directory not under the process's 1347 * current root directory (if there is one). 1348 */ 1349 cwdi = p->p_cwdi; 1350 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1351 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1352 vrele(vp); 1353 error = EPERM; /* operation not permitted */ 1354 } else { 1355 vrele(cwdi->cwdi_cdir); 1356 cwdi->cwdi_cdir = vp; 1357 } 1358 rw_exit(&cwdi->cwdi_lock); 1359 1360 out: 1361 fd_putfile(fd); 1362 return (error); 1363 } 1364 1365 /* 1366 * Change this process's notion of the root directory to a given file 1367 * descriptor. 1368 */ 1369 int 1370 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1371 { 1372 struct proc *p = l->l_proc; 1373 struct vnode *vp; 1374 file_t *fp; 1375 int error, fd = SCARG(uap, fd); 1376 1377 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1378 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1379 return error; 1380 /* fd_getvnode() will use the descriptor for us */ 1381 if ((error = fd_getvnode(fd, &fp)) != 0) 1382 return error; 1383 vp = fp->f_data; 1384 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1385 if (vp->v_type != VDIR) 1386 error = ENOTDIR; 1387 else 1388 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1389 VOP_UNLOCK(vp); 1390 if (error) 1391 goto out; 1392 vref(vp); 1393 1394 change_root(p->p_cwdi, vp, l); 1395 1396 out: 1397 fd_putfile(fd); 1398 return (error); 1399 } 1400 1401 /* 1402 * Change current working directory (``.''). 1403 */ 1404 /* ARGSUSED */ 1405 int 1406 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1407 { 1408 /* { 1409 syscallarg(const char *) path; 1410 } */ 1411 struct proc *p = l->l_proc; 1412 struct cwdinfo *cwdi; 1413 int error; 1414 struct vnode *vp; 1415 1416 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1417 &vp, l)) != 0) 1418 return (error); 1419 cwdi = p->p_cwdi; 1420 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1421 vrele(cwdi->cwdi_cdir); 1422 cwdi->cwdi_cdir = vp; 1423 rw_exit(&cwdi->cwdi_lock); 1424 return (0); 1425 } 1426 1427 /* 1428 * Change notion of root (``/'') directory. 1429 */ 1430 /* ARGSUSED */ 1431 int 1432 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1433 { 1434 /* { 1435 syscallarg(const char *) path; 1436 } */ 1437 struct proc *p = l->l_proc; 1438 int error; 1439 struct vnode *vp; 1440 1441 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1442 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1443 return (error); 1444 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1445 &vp, l)) != 0) 1446 return (error); 1447 1448 change_root(p->p_cwdi, vp, l); 1449 1450 return (0); 1451 } 1452 1453 /* 1454 * Common routine for chroot and fchroot. 1455 * NB: callers need to properly authorize the change root operation. 1456 */ 1457 void 1458 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1459 { 1460 struct proc *p = l->l_proc; 1461 kauth_cred_t ncred; 1462 1463 ncred = kauth_cred_alloc(); 1464 1465 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1466 if (cwdi->cwdi_rdir != NULL) 1467 vrele(cwdi->cwdi_rdir); 1468 cwdi->cwdi_rdir = vp; 1469 1470 /* 1471 * Prevent escaping from chroot by putting the root under 1472 * the working directory. Silently chdir to / if we aren't 1473 * already there. 1474 */ 1475 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1476 /* 1477 * XXX would be more failsafe to change directory to a 1478 * deadfs node here instead 1479 */ 1480 vrele(cwdi->cwdi_cdir); 1481 vref(vp); 1482 cwdi->cwdi_cdir = vp; 1483 } 1484 rw_exit(&cwdi->cwdi_lock); 1485 1486 /* Get a write lock on the process credential. */ 1487 proc_crmod_enter(); 1488 1489 kauth_cred_clone(p->p_cred, ncred); 1490 kauth_proc_chroot(ncred, p->p_cwdi); 1491 1492 /* Broadcast our credentials to the process and other LWPs. */ 1493 proc_crmod_leave(ncred, p->p_cred, true); 1494 } 1495 1496 /* 1497 * Common routine for chroot and chdir. 1498 * XXX "where" should be enum uio_seg 1499 */ 1500 int 1501 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1502 { 1503 struct pathbuf *pb; 1504 struct nameidata nd; 1505 int error; 1506 1507 error = pathbuf_maybe_copyin(path, where, &pb); 1508 if (error) { 1509 return error; 1510 } 1511 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1512 if ((error = namei(&nd)) != 0) { 1513 pathbuf_destroy(pb); 1514 return error; 1515 } 1516 *vpp = nd.ni_vp; 1517 pathbuf_destroy(pb); 1518 1519 if ((*vpp)->v_type != VDIR) 1520 error = ENOTDIR; 1521 else 1522 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1523 1524 if (error) 1525 vput(*vpp); 1526 else 1527 VOP_UNLOCK(*vpp); 1528 return (error); 1529 } 1530 1531 /* 1532 * Internals of sys_open - path has already been converted into a pathbuf 1533 * (so we can easily reuse this function from other parts of the kernel, 1534 * like posix_spawn post-processing). 1535 */ 1536 int 1537 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1538 int open_mode, int *fd) 1539 { 1540 struct proc *p = l->l_proc; 1541 struct cwdinfo *cwdi = p->p_cwdi; 1542 file_t *fp; 1543 struct vnode *vp; 1544 int flags, cmode; 1545 int indx, error; 1546 struct nameidata nd; 1547 1548 if (open_flags & O_SEARCH) { 1549 open_flags &= ~(int)O_SEARCH; 1550 } 1551 1552 flags = FFLAGS(open_flags); 1553 if ((flags & (FREAD | FWRITE)) == 0) 1554 return EINVAL; 1555 1556 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1557 return error; 1558 } 1559 1560 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1561 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1562 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1563 if (dvp != NULL) 1564 NDAT(&nd, dvp); 1565 1566 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1567 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1568 fd_abort(p, fp, indx); 1569 if ((error == EDUPFD || error == EMOVEFD) && 1570 l->l_dupfd >= 0 && /* XXX from fdopen */ 1571 (error = 1572 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1573 *fd = indx; 1574 return 0; 1575 } 1576 if (error == ERESTART) 1577 error = EINTR; 1578 return error; 1579 } 1580 1581 l->l_dupfd = 0; 1582 vp = nd.ni_vp; 1583 1584 if ((error = open_setfp(l, fp, vp, indx, flags))) 1585 return error; 1586 1587 VOP_UNLOCK(vp); 1588 *fd = indx; 1589 fd_affix(p, fp, indx); 1590 return 0; 1591 } 1592 1593 int 1594 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1595 { 1596 struct pathbuf *pb; 1597 int error, oflags; 1598 1599 oflags = FFLAGS(open_flags); 1600 if ((oflags & (FREAD | FWRITE)) == 0) 1601 return EINVAL; 1602 1603 pb = pathbuf_create(path); 1604 if (pb == NULL) 1605 return ENOMEM; 1606 1607 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1608 pathbuf_destroy(pb); 1609 1610 return error; 1611 } 1612 1613 /* 1614 * Check permissions, allocate an open file structure, 1615 * and call the device open routine if any. 1616 */ 1617 static int 1618 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1619 int mode, int *fd) 1620 { 1621 file_t *dfp = NULL; 1622 struct vnode *dvp = NULL; 1623 struct pathbuf *pb; 1624 int error; 1625 1626 #ifdef COMPAT_10 /* XXX: and perhaps later */ 1627 if (path == NULL) 1628 pb = pathbuf_create("."); 1629 else 1630 #endif 1631 { 1632 error = pathbuf_copyin(path, &pb); 1633 if (error) 1634 return error; 1635 } 1636 1637 if (fdat != AT_FDCWD) { 1638 /* fd_getvnode() will use the descriptor for us */ 1639 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1640 goto out; 1641 1642 dvp = dfp->f_data; 1643 } 1644 1645 error = do_open(l, dvp, pb, flags, mode, fd); 1646 1647 if (dfp != NULL) 1648 fd_putfile(fdat); 1649 out: 1650 pathbuf_destroy(pb); 1651 return error; 1652 } 1653 1654 int 1655 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1656 { 1657 /* { 1658 syscallarg(const char *) path; 1659 syscallarg(int) flags; 1660 syscallarg(int) mode; 1661 } */ 1662 int error; 1663 int fd; 1664 1665 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1666 SCARG(uap, flags), SCARG(uap, mode), &fd); 1667 1668 if (error == 0) 1669 *retval = fd; 1670 1671 return error; 1672 } 1673 1674 int 1675 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1676 { 1677 /* { 1678 syscallarg(int) fd; 1679 syscallarg(const char *) path; 1680 syscallarg(int) oflags; 1681 syscallarg(int) mode; 1682 } */ 1683 int error; 1684 int fd; 1685 1686 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1687 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1688 1689 if (error == 0) 1690 *retval = fd; 1691 1692 return error; 1693 } 1694 1695 static void 1696 vfs__fhfree(fhandle_t *fhp) 1697 { 1698 size_t fhsize; 1699 1700 if (fhp == NULL) { 1701 return; 1702 } 1703 fhsize = FHANDLE_SIZE(fhp); 1704 kmem_free(fhp, fhsize); 1705 } 1706 1707 /* 1708 * vfs_composefh: compose a filehandle. 1709 */ 1710 1711 int 1712 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1713 { 1714 struct mount *mp; 1715 struct fid *fidp; 1716 int error; 1717 size_t needfhsize; 1718 size_t fidsize; 1719 1720 mp = vp->v_mount; 1721 fidp = NULL; 1722 if (*fh_size < FHANDLE_SIZE_MIN) { 1723 fidsize = 0; 1724 } else { 1725 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1726 if (fhp != NULL) { 1727 memset(fhp, 0, *fh_size); 1728 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1729 fidp = &fhp->fh_fid; 1730 } 1731 } 1732 error = VFS_VPTOFH(vp, fidp, &fidsize); 1733 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1734 if (error == 0 && *fh_size < needfhsize) { 1735 error = E2BIG; 1736 } 1737 *fh_size = needfhsize; 1738 return error; 1739 } 1740 1741 int 1742 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1743 { 1744 struct mount *mp; 1745 fhandle_t *fhp; 1746 size_t fhsize; 1747 size_t fidsize; 1748 int error; 1749 1750 *fhpp = NULL; 1751 mp = vp->v_mount; 1752 fidsize = 0; 1753 error = VFS_VPTOFH(vp, NULL, &fidsize); 1754 KASSERT(error != 0); 1755 if (error != E2BIG) { 1756 goto out; 1757 } 1758 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1759 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1760 if (fhp == NULL) { 1761 error = ENOMEM; 1762 goto out; 1763 } 1764 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1765 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1766 if (error == 0) { 1767 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1768 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1769 *fhpp = fhp; 1770 } else { 1771 kmem_free(fhp, fhsize); 1772 } 1773 out: 1774 return error; 1775 } 1776 1777 void 1778 vfs_composefh_free(fhandle_t *fhp) 1779 { 1780 1781 vfs__fhfree(fhp); 1782 } 1783 1784 /* 1785 * vfs_fhtovp: lookup a vnode by a filehandle. 1786 */ 1787 1788 int 1789 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1790 { 1791 struct mount *mp; 1792 int error; 1793 1794 *vpp = NULL; 1795 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1796 if (mp == NULL) { 1797 error = ESTALE; 1798 goto out; 1799 } 1800 if (mp->mnt_op->vfs_fhtovp == NULL) { 1801 error = EOPNOTSUPP; 1802 goto out; 1803 } 1804 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1805 out: 1806 return error; 1807 } 1808 1809 /* 1810 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1811 * the needed size. 1812 */ 1813 1814 int 1815 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1816 { 1817 fhandle_t *fhp; 1818 int error; 1819 1820 *fhpp = NULL; 1821 if (fhsize > FHANDLE_SIZE_MAX) { 1822 return EINVAL; 1823 } 1824 if (fhsize < FHANDLE_SIZE_MIN) { 1825 return EINVAL; 1826 } 1827 again: 1828 fhp = kmem_alloc(fhsize, KM_SLEEP); 1829 if (fhp == NULL) { 1830 return ENOMEM; 1831 } 1832 error = copyin(ufhp, fhp, fhsize); 1833 if (error == 0) { 1834 /* XXX this check shouldn't be here */ 1835 if (FHANDLE_SIZE(fhp) == fhsize) { 1836 *fhpp = fhp; 1837 return 0; 1838 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1839 /* 1840 * a kludge for nfsv2 padded handles. 1841 */ 1842 size_t sz; 1843 1844 sz = FHANDLE_SIZE(fhp); 1845 kmem_free(fhp, fhsize); 1846 fhsize = sz; 1847 goto again; 1848 } else { 1849 /* 1850 * userland told us wrong size. 1851 */ 1852 error = EINVAL; 1853 } 1854 } 1855 kmem_free(fhp, fhsize); 1856 return error; 1857 } 1858 1859 void 1860 vfs_copyinfh_free(fhandle_t *fhp) 1861 { 1862 1863 vfs__fhfree(fhp); 1864 } 1865 1866 /* 1867 * Get file handle system call 1868 */ 1869 int 1870 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1871 { 1872 /* { 1873 syscallarg(char *) fname; 1874 syscallarg(fhandle_t *) fhp; 1875 syscallarg(size_t *) fh_size; 1876 } */ 1877 struct vnode *vp; 1878 fhandle_t *fh; 1879 int error; 1880 struct pathbuf *pb; 1881 struct nameidata nd; 1882 size_t sz; 1883 size_t usz; 1884 1885 /* 1886 * Must be super user 1887 */ 1888 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1889 0, NULL, NULL, NULL); 1890 if (error) 1891 return (error); 1892 1893 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1894 if (error) { 1895 return error; 1896 } 1897 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1898 error = namei(&nd); 1899 if (error) { 1900 pathbuf_destroy(pb); 1901 return error; 1902 } 1903 vp = nd.ni_vp; 1904 pathbuf_destroy(pb); 1905 1906 error = vfs_composefh_alloc(vp, &fh); 1907 vput(vp); 1908 if (error != 0) { 1909 goto out; 1910 } 1911 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1912 if (error != 0) { 1913 goto out; 1914 } 1915 sz = FHANDLE_SIZE(fh); 1916 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1917 if (error != 0) { 1918 goto out; 1919 } 1920 if (usz >= sz) { 1921 error = copyout(fh, SCARG(uap, fhp), sz); 1922 } else { 1923 error = E2BIG; 1924 } 1925 out: 1926 vfs_composefh_free(fh); 1927 return (error); 1928 } 1929 1930 /* 1931 * Open a file given a file handle. 1932 * 1933 * Check permissions, allocate an open file structure, 1934 * and call the device open routine if any. 1935 */ 1936 1937 int 1938 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1939 register_t *retval) 1940 { 1941 file_t *fp; 1942 struct vnode *vp = NULL; 1943 kauth_cred_t cred = l->l_cred; 1944 file_t *nfp; 1945 int indx, error = 0; 1946 struct vattr va; 1947 fhandle_t *fh; 1948 int flags; 1949 proc_t *p; 1950 1951 p = curproc; 1952 1953 /* 1954 * Must be super user 1955 */ 1956 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1957 0, NULL, NULL, NULL))) 1958 return (error); 1959 1960 if (oflags & O_SEARCH) { 1961 oflags &= ~(int)O_SEARCH; 1962 } 1963 1964 flags = FFLAGS(oflags); 1965 if ((flags & (FREAD | FWRITE)) == 0) 1966 return (EINVAL); 1967 if ((flags & O_CREAT)) 1968 return (EINVAL); 1969 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1970 return (error); 1971 fp = nfp; 1972 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1973 if (error != 0) { 1974 goto bad; 1975 } 1976 error = vfs_fhtovp(fh, &vp); 1977 if (error != 0) { 1978 goto bad; 1979 } 1980 1981 /* Now do an effective vn_open */ 1982 1983 if (vp->v_type == VSOCK) { 1984 error = EOPNOTSUPP; 1985 goto bad; 1986 } 1987 error = vn_openchk(vp, cred, flags); 1988 if (error != 0) 1989 goto bad; 1990 if (flags & O_TRUNC) { 1991 VOP_UNLOCK(vp); /* XXX */ 1992 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1993 vattr_null(&va); 1994 va.va_size = 0; 1995 error = VOP_SETATTR(vp, &va, cred); 1996 if (error) 1997 goto bad; 1998 } 1999 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2000 goto bad; 2001 if (flags & FWRITE) { 2002 mutex_enter(vp->v_interlock); 2003 vp->v_writecount++; 2004 mutex_exit(vp->v_interlock); 2005 } 2006 2007 /* done with modified vn_open, now finish what sys_open does. */ 2008 if ((error = open_setfp(l, fp, vp, indx, flags))) 2009 return error; 2010 2011 VOP_UNLOCK(vp); 2012 *retval = indx; 2013 fd_affix(p, fp, indx); 2014 vfs_copyinfh_free(fh); 2015 return (0); 2016 2017 bad: 2018 fd_abort(p, fp, indx); 2019 if (vp != NULL) 2020 vput(vp); 2021 vfs_copyinfh_free(fh); 2022 return (error); 2023 } 2024 2025 int 2026 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2027 { 2028 /* { 2029 syscallarg(const void *) fhp; 2030 syscallarg(size_t) fh_size; 2031 syscallarg(int) flags; 2032 } */ 2033 2034 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2035 SCARG(uap, flags), retval); 2036 } 2037 2038 int 2039 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2040 { 2041 int error; 2042 fhandle_t *fh; 2043 struct vnode *vp; 2044 2045 /* 2046 * Must be super user 2047 */ 2048 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2049 0, NULL, NULL, NULL))) 2050 return (error); 2051 2052 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2053 if (error != 0) 2054 return error; 2055 2056 error = vfs_fhtovp(fh, &vp); 2057 vfs_copyinfh_free(fh); 2058 if (error != 0) 2059 return error; 2060 2061 error = vn_stat(vp, sb); 2062 vput(vp); 2063 return error; 2064 } 2065 2066 2067 /* ARGSUSED */ 2068 int 2069 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2070 { 2071 /* { 2072 syscallarg(const void *) fhp; 2073 syscallarg(size_t) fh_size; 2074 syscallarg(struct stat *) sb; 2075 } */ 2076 struct stat sb; 2077 int error; 2078 2079 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2080 if (error) 2081 return error; 2082 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2083 } 2084 2085 int 2086 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2087 int flags) 2088 { 2089 fhandle_t *fh; 2090 struct mount *mp; 2091 struct vnode *vp; 2092 int error; 2093 2094 /* 2095 * Must be super user 2096 */ 2097 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2098 0, NULL, NULL, NULL))) 2099 return error; 2100 2101 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2102 if (error != 0) 2103 return error; 2104 2105 error = vfs_fhtovp(fh, &vp); 2106 vfs_copyinfh_free(fh); 2107 if (error != 0) 2108 return error; 2109 2110 mp = vp->v_mount; 2111 error = dostatvfs(mp, sb, l, flags, 1); 2112 vput(vp); 2113 return error; 2114 } 2115 2116 /* ARGSUSED */ 2117 int 2118 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2119 { 2120 /* { 2121 syscallarg(const void *) fhp; 2122 syscallarg(size_t) fh_size; 2123 syscallarg(struct statvfs *) buf; 2124 syscallarg(int) flags; 2125 } */ 2126 struct statvfs *sb = STATVFSBUF_GET(); 2127 int error; 2128 2129 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2130 SCARG(uap, flags)); 2131 if (error == 0) 2132 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2133 STATVFSBUF_PUT(sb); 2134 return error; 2135 } 2136 2137 /* 2138 * Create a special file. 2139 */ 2140 /* ARGSUSED */ 2141 int 2142 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2143 register_t *retval) 2144 { 2145 /* { 2146 syscallarg(const char *) path; 2147 syscallarg(mode_t) mode; 2148 syscallarg(dev_t) dev; 2149 } */ 2150 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2151 SCARG(uap, dev), retval, UIO_USERSPACE); 2152 } 2153 2154 int 2155 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2156 register_t *retval) 2157 { 2158 /* { 2159 syscallarg(int) fd; 2160 syscallarg(const char *) path; 2161 syscallarg(mode_t) mode; 2162 syscallarg(int) pad; 2163 syscallarg(dev_t) dev; 2164 } */ 2165 2166 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2167 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2168 } 2169 2170 int 2171 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2172 register_t *retval, enum uio_seg seg) 2173 { 2174 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2175 } 2176 2177 int 2178 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2179 dev_t dev, register_t *retval, enum uio_seg seg) 2180 { 2181 struct proc *p = l->l_proc; 2182 struct vnode *vp; 2183 struct vattr vattr; 2184 int error, optype; 2185 struct pathbuf *pb; 2186 struct nameidata nd; 2187 const char *pathstring; 2188 2189 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2190 0, NULL, NULL, NULL)) != 0) 2191 return (error); 2192 2193 optype = VOP_MKNOD_DESCOFFSET; 2194 2195 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2196 if (error) { 2197 return error; 2198 } 2199 pathstring = pathbuf_stringcopy_get(pb); 2200 if (pathstring == NULL) { 2201 pathbuf_destroy(pb); 2202 return ENOMEM; 2203 } 2204 2205 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2206 2207 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2208 goto out; 2209 vp = nd.ni_vp; 2210 2211 if (vp != NULL) 2212 error = EEXIST; 2213 else { 2214 vattr_null(&vattr); 2215 /* We will read cwdi->cwdi_cmask unlocked. */ 2216 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2217 vattr.va_rdev = dev; 2218 2219 switch (mode & S_IFMT) { 2220 case S_IFMT: /* used by badsect to flag bad sectors */ 2221 vattr.va_type = VBAD; 2222 break; 2223 case S_IFCHR: 2224 vattr.va_type = VCHR; 2225 break; 2226 case S_IFBLK: 2227 vattr.va_type = VBLK; 2228 break; 2229 case S_IFWHT: 2230 optype = VOP_WHITEOUT_DESCOFFSET; 2231 break; 2232 case S_IFREG: 2233 #if NVERIEXEC > 0 2234 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2235 O_CREAT); 2236 #endif /* NVERIEXEC > 0 */ 2237 vattr.va_type = VREG; 2238 vattr.va_rdev = VNOVAL; 2239 optype = VOP_CREATE_DESCOFFSET; 2240 break; 2241 default: 2242 error = EINVAL; 2243 break; 2244 } 2245 } 2246 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2247 && vattr.va_rdev == VNOVAL) 2248 error = EINVAL; 2249 if (!error) { 2250 switch (optype) { 2251 case VOP_WHITEOUT_DESCOFFSET: 2252 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2253 if (error) 2254 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2255 vput(nd.ni_dvp); 2256 break; 2257 2258 case VOP_MKNOD_DESCOFFSET: 2259 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2260 &nd.ni_cnd, &vattr); 2261 if (error == 0) 2262 vrele(nd.ni_vp); 2263 vput(nd.ni_dvp); 2264 break; 2265 2266 case VOP_CREATE_DESCOFFSET: 2267 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2268 &nd.ni_cnd, &vattr); 2269 if (error == 0) 2270 vrele(nd.ni_vp); 2271 vput(nd.ni_dvp); 2272 break; 2273 } 2274 } else { 2275 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2276 if (nd.ni_dvp == vp) 2277 vrele(nd.ni_dvp); 2278 else 2279 vput(nd.ni_dvp); 2280 if (vp) 2281 vrele(vp); 2282 } 2283 out: 2284 pathbuf_stringcopy_put(pb, pathstring); 2285 pathbuf_destroy(pb); 2286 return (error); 2287 } 2288 2289 /* 2290 * Create a named pipe. 2291 */ 2292 /* ARGSUSED */ 2293 int 2294 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2295 { 2296 /* { 2297 syscallarg(const char *) path; 2298 syscallarg(int) mode; 2299 } */ 2300 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2301 } 2302 2303 int 2304 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2305 register_t *retval) 2306 { 2307 /* { 2308 syscallarg(int) fd; 2309 syscallarg(const char *) path; 2310 syscallarg(int) mode; 2311 } */ 2312 2313 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2314 SCARG(uap, mode)); 2315 } 2316 2317 static int 2318 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2319 { 2320 struct proc *p = l->l_proc; 2321 struct vattr vattr; 2322 int error; 2323 struct pathbuf *pb; 2324 struct nameidata nd; 2325 2326 error = pathbuf_copyin(path, &pb); 2327 if (error) { 2328 return error; 2329 } 2330 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2331 2332 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2333 pathbuf_destroy(pb); 2334 return error; 2335 } 2336 if (nd.ni_vp != NULL) { 2337 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2338 if (nd.ni_dvp == nd.ni_vp) 2339 vrele(nd.ni_dvp); 2340 else 2341 vput(nd.ni_dvp); 2342 vrele(nd.ni_vp); 2343 pathbuf_destroy(pb); 2344 return (EEXIST); 2345 } 2346 vattr_null(&vattr); 2347 vattr.va_type = VFIFO; 2348 /* We will read cwdi->cwdi_cmask unlocked. */ 2349 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2350 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2351 if (error == 0) 2352 vrele(nd.ni_vp); 2353 vput(nd.ni_dvp); 2354 pathbuf_destroy(pb); 2355 return (error); 2356 } 2357 2358 /* 2359 * Make a hard file link. 2360 */ 2361 /* ARGSUSED */ 2362 int 2363 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2364 const char *link, int follow, register_t *retval) 2365 { 2366 struct vnode *vp; 2367 struct pathbuf *linkpb; 2368 struct nameidata nd; 2369 namei_simple_flags_t ns_flags; 2370 int error; 2371 2372 if (follow & AT_SYMLINK_FOLLOW) 2373 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2374 else 2375 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2376 2377 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2378 if (error != 0) 2379 return (error); 2380 error = pathbuf_copyin(link, &linkpb); 2381 if (error) { 2382 goto out1; 2383 } 2384 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2385 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2386 goto out2; 2387 if (nd.ni_vp) { 2388 error = EEXIST; 2389 goto abortop; 2390 } 2391 /* Prevent hard links on directories. */ 2392 if (vp->v_type == VDIR) { 2393 error = EPERM; 2394 goto abortop; 2395 } 2396 /* Prevent cross-mount operation. */ 2397 if (nd.ni_dvp->v_mount != vp->v_mount) { 2398 error = EXDEV; 2399 goto abortop; 2400 } 2401 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2402 out2: 2403 pathbuf_destroy(linkpb); 2404 out1: 2405 vrele(vp); 2406 return (error); 2407 abortop: 2408 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2409 if (nd.ni_dvp == nd.ni_vp) 2410 vrele(nd.ni_dvp); 2411 else 2412 vput(nd.ni_dvp); 2413 if (nd.ni_vp != NULL) 2414 vrele(nd.ni_vp); 2415 goto out2; 2416 } 2417 2418 int 2419 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2420 { 2421 /* { 2422 syscallarg(const char *) path; 2423 syscallarg(const char *) link; 2424 } */ 2425 const char *path = SCARG(uap, path); 2426 const char *link = SCARG(uap, link); 2427 2428 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2429 AT_SYMLINK_FOLLOW, retval); 2430 } 2431 2432 int 2433 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2434 register_t *retval) 2435 { 2436 /* { 2437 syscallarg(int) fd1; 2438 syscallarg(const char *) name1; 2439 syscallarg(int) fd2; 2440 syscallarg(const char *) name2; 2441 syscallarg(int) flags; 2442 } */ 2443 int fd1 = SCARG(uap, fd1); 2444 const char *name1 = SCARG(uap, name1); 2445 int fd2 = SCARG(uap, fd2); 2446 const char *name2 = SCARG(uap, name2); 2447 int follow; 2448 2449 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2450 2451 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2452 } 2453 2454 2455 int 2456 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2457 { 2458 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2459 } 2460 2461 static int 2462 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2463 const char *link, enum uio_seg seg) 2464 { 2465 struct proc *p = curproc; 2466 struct vattr vattr; 2467 char *path; 2468 int error; 2469 struct pathbuf *linkpb; 2470 struct nameidata nd; 2471 2472 KASSERT(l != NULL || fdat == AT_FDCWD); 2473 2474 path = PNBUF_GET(); 2475 if (seg == UIO_USERSPACE) { 2476 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2477 goto out1; 2478 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2479 goto out1; 2480 } else { 2481 KASSERT(strlen(patharg) < MAXPATHLEN); 2482 strcpy(path, patharg); 2483 linkpb = pathbuf_create(link); 2484 if (linkpb == NULL) { 2485 error = ENOMEM; 2486 goto out1; 2487 } 2488 } 2489 ktrkuser("symlink-target", path, strlen(path)); 2490 2491 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2492 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2493 goto out2; 2494 if (nd.ni_vp) { 2495 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2496 if (nd.ni_dvp == nd.ni_vp) 2497 vrele(nd.ni_dvp); 2498 else 2499 vput(nd.ni_dvp); 2500 vrele(nd.ni_vp); 2501 error = EEXIST; 2502 goto out2; 2503 } 2504 vattr_null(&vattr); 2505 vattr.va_type = VLNK; 2506 /* We will read cwdi->cwdi_cmask unlocked. */ 2507 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2508 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2509 if (error == 0) 2510 vrele(nd.ni_vp); 2511 vput(nd.ni_dvp); 2512 out2: 2513 pathbuf_destroy(linkpb); 2514 out1: 2515 PNBUF_PUT(path); 2516 return (error); 2517 } 2518 2519 /* 2520 * Make a symbolic link. 2521 */ 2522 /* ARGSUSED */ 2523 int 2524 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2525 { 2526 /* { 2527 syscallarg(const char *) path; 2528 syscallarg(const char *) link; 2529 } */ 2530 2531 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2532 UIO_USERSPACE); 2533 } 2534 2535 int 2536 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2537 register_t *retval) 2538 { 2539 /* { 2540 syscallarg(const char *) path1; 2541 syscallarg(int) fd; 2542 syscallarg(const char *) path2; 2543 } */ 2544 2545 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2546 SCARG(uap, path2), UIO_USERSPACE); 2547 } 2548 2549 /* 2550 * Delete a whiteout from the filesystem. 2551 */ 2552 /* ARGSUSED */ 2553 int 2554 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2555 { 2556 /* { 2557 syscallarg(const char *) path; 2558 } */ 2559 int error; 2560 struct pathbuf *pb; 2561 struct nameidata nd; 2562 2563 error = pathbuf_copyin(SCARG(uap, path), &pb); 2564 if (error) { 2565 return error; 2566 } 2567 2568 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2569 error = namei(&nd); 2570 if (error) { 2571 pathbuf_destroy(pb); 2572 return (error); 2573 } 2574 2575 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2576 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2577 if (nd.ni_dvp == nd.ni_vp) 2578 vrele(nd.ni_dvp); 2579 else 2580 vput(nd.ni_dvp); 2581 if (nd.ni_vp) 2582 vrele(nd.ni_vp); 2583 pathbuf_destroy(pb); 2584 return (EEXIST); 2585 } 2586 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2587 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2588 vput(nd.ni_dvp); 2589 pathbuf_destroy(pb); 2590 return (error); 2591 } 2592 2593 /* 2594 * Delete a name from the filesystem. 2595 */ 2596 /* ARGSUSED */ 2597 int 2598 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2599 { 2600 /* { 2601 syscallarg(const char *) path; 2602 } */ 2603 2604 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2605 } 2606 2607 int 2608 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2609 register_t *retval) 2610 { 2611 /* { 2612 syscallarg(int) fd; 2613 syscallarg(const char *) path; 2614 syscallarg(int) flag; 2615 } */ 2616 2617 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2618 SCARG(uap, flag), UIO_USERSPACE); 2619 } 2620 2621 int 2622 do_sys_unlink(const char *arg, enum uio_seg seg) 2623 { 2624 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2625 } 2626 2627 static int 2628 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2629 enum uio_seg seg) 2630 { 2631 struct vnode *vp; 2632 int error; 2633 struct pathbuf *pb; 2634 struct nameidata nd; 2635 const char *pathstring; 2636 2637 KASSERT(l != NULL || fdat == AT_FDCWD); 2638 2639 error = pathbuf_maybe_copyin(arg, seg, &pb); 2640 if (error) { 2641 return error; 2642 } 2643 pathstring = pathbuf_stringcopy_get(pb); 2644 if (pathstring == NULL) { 2645 pathbuf_destroy(pb); 2646 return ENOMEM; 2647 } 2648 2649 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2650 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2651 goto out; 2652 vp = nd.ni_vp; 2653 2654 /* 2655 * The root of a mounted filesystem cannot be deleted. 2656 */ 2657 if ((vp->v_vflag & VV_ROOT) != 0) { 2658 error = EBUSY; 2659 goto abort; 2660 } 2661 2662 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2663 error = EBUSY; 2664 goto abort; 2665 } 2666 2667 /* 2668 * No rmdir "." please. 2669 */ 2670 if (nd.ni_dvp == vp) { 2671 error = EINVAL; 2672 goto abort; 2673 } 2674 2675 /* 2676 * AT_REMOVEDIR is required to remove a directory 2677 */ 2678 if (vp->v_type == VDIR) { 2679 if (!(flags & AT_REMOVEDIR)) { 2680 error = EPERM; 2681 goto abort; 2682 } else { 2683 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2684 goto out; 2685 } 2686 } 2687 2688 /* 2689 * Starting here we only deal with non directories. 2690 */ 2691 if (flags & AT_REMOVEDIR) { 2692 error = ENOTDIR; 2693 goto abort; 2694 } 2695 2696 2697 #if NVERIEXEC > 0 2698 /* Handle remove requests for veriexec entries. */ 2699 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2700 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2701 if (nd.ni_dvp == vp) 2702 vrele(nd.ni_dvp); 2703 else 2704 vput(nd.ni_dvp); 2705 vput(vp); 2706 goto out; 2707 } 2708 #endif /* NVERIEXEC > 0 */ 2709 2710 #ifdef FILEASSOC 2711 (void)fileassoc_file_delete(vp); 2712 #endif /* FILEASSOC */ 2713 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2714 goto out; 2715 2716 abort: 2717 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2718 if (nd.ni_dvp == vp) 2719 vrele(nd.ni_dvp); 2720 else 2721 vput(nd.ni_dvp); 2722 vput(vp); 2723 2724 out: 2725 pathbuf_stringcopy_put(pb, pathstring); 2726 pathbuf_destroy(pb); 2727 return (error); 2728 } 2729 2730 /* 2731 * Reposition read/write file offset. 2732 */ 2733 int 2734 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2735 { 2736 /* { 2737 syscallarg(int) fd; 2738 syscallarg(int) pad; 2739 syscallarg(off_t) offset; 2740 syscallarg(int) whence; 2741 } */ 2742 kauth_cred_t cred = l->l_cred; 2743 file_t *fp; 2744 struct vnode *vp; 2745 struct vattr vattr; 2746 off_t newoff; 2747 int error, fd; 2748 2749 fd = SCARG(uap, fd); 2750 2751 if ((fp = fd_getfile(fd)) == NULL) 2752 return (EBADF); 2753 2754 vp = fp->f_data; 2755 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2756 error = ESPIPE; 2757 goto out; 2758 } 2759 2760 switch (SCARG(uap, whence)) { 2761 case SEEK_CUR: 2762 newoff = fp->f_offset + SCARG(uap, offset); 2763 break; 2764 case SEEK_END: 2765 vn_lock(vp, LK_SHARED | LK_RETRY); 2766 error = VOP_GETATTR(vp, &vattr, cred); 2767 VOP_UNLOCK(vp); 2768 if (error) { 2769 goto out; 2770 } 2771 newoff = SCARG(uap, offset) + vattr.va_size; 2772 break; 2773 case SEEK_SET: 2774 newoff = SCARG(uap, offset); 2775 break; 2776 default: 2777 error = EINVAL; 2778 goto out; 2779 } 2780 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2781 *(off_t *)retval = fp->f_offset = newoff; 2782 } 2783 out: 2784 fd_putfile(fd); 2785 return (error); 2786 } 2787 2788 /* 2789 * Positional read system call. 2790 */ 2791 int 2792 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2793 { 2794 /* { 2795 syscallarg(int) fd; 2796 syscallarg(void *) buf; 2797 syscallarg(size_t) nbyte; 2798 syscallarg(off_t) offset; 2799 } */ 2800 file_t *fp; 2801 struct vnode *vp; 2802 off_t offset; 2803 int error, fd = SCARG(uap, fd); 2804 2805 if ((fp = fd_getfile(fd)) == NULL) 2806 return (EBADF); 2807 2808 if ((fp->f_flag & FREAD) == 0) { 2809 fd_putfile(fd); 2810 return (EBADF); 2811 } 2812 2813 vp = fp->f_data; 2814 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2815 error = ESPIPE; 2816 goto out; 2817 } 2818 2819 offset = SCARG(uap, offset); 2820 2821 /* 2822 * XXX This works because no file systems actually 2823 * XXX take any action on the seek operation. 2824 */ 2825 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2826 goto out; 2827 2828 /* dofileread() will unuse the descriptor for us */ 2829 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2830 &offset, 0, retval)); 2831 2832 out: 2833 fd_putfile(fd); 2834 return (error); 2835 } 2836 2837 /* 2838 * Positional scatter read system call. 2839 */ 2840 int 2841 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2842 { 2843 /* { 2844 syscallarg(int) fd; 2845 syscallarg(const struct iovec *) iovp; 2846 syscallarg(int) iovcnt; 2847 syscallarg(off_t) offset; 2848 } */ 2849 off_t offset = SCARG(uap, offset); 2850 2851 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2852 SCARG(uap, iovcnt), &offset, 0, retval); 2853 } 2854 2855 /* 2856 * Positional write system call. 2857 */ 2858 int 2859 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2860 { 2861 /* { 2862 syscallarg(int) fd; 2863 syscallarg(const void *) buf; 2864 syscallarg(size_t) nbyte; 2865 syscallarg(off_t) offset; 2866 } */ 2867 file_t *fp; 2868 struct vnode *vp; 2869 off_t offset; 2870 int error, fd = SCARG(uap, fd); 2871 2872 if ((fp = fd_getfile(fd)) == NULL) 2873 return (EBADF); 2874 2875 if ((fp->f_flag & FWRITE) == 0) { 2876 fd_putfile(fd); 2877 return (EBADF); 2878 } 2879 2880 vp = fp->f_data; 2881 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2882 error = ESPIPE; 2883 goto out; 2884 } 2885 2886 offset = SCARG(uap, offset); 2887 2888 /* 2889 * XXX This works because no file systems actually 2890 * XXX take any action on the seek operation. 2891 */ 2892 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2893 goto out; 2894 2895 /* dofilewrite() will unuse the descriptor for us */ 2896 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2897 &offset, 0, retval)); 2898 2899 out: 2900 fd_putfile(fd); 2901 return (error); 2902 } 2903 2904 /* 2905 * Positional gather write system call. 2906 */ 2907 int 2908 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2909 { 2910 /* { 2911 syscallarg(int) fd; 2912 syscallarg(const struct iovec *) iovp; 2913 syscallarg(int) iovcnt; 2914 syscallarg(off_t) offset; 2915 } */ 2916 off_t offset = SCARG(uap, offset); 2917 2918 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2919 SCARG(uap, iovcnt), &offset, 0, retval); 2920 } 2921 2922 /* 2923 * Check access permissions. 2924 */ 2925 int 2926 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2927 { 2928 /* { 2929 syscallarg(const char *) path; 2930 syscallarg(int) flags; 2931 } */ 2932 2933 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2934 SCARG(uap, flags), 0); 2935 } 2936 2937 int 2938 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2939 int mode, int flags) 2940 { 2941 kauth_cred_t cred; 2942 struct vnode *vp; 2943 int error, nd_flag, vmode; 2944 struct pathbuf *pb; 2945 struct nameidata nd; 2946 2947 CTASSERT(F_OK == 0); 2948 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2949 /* nonsense mode */ 2950 return EINVAL; 2951 } 2952 2953 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2954 if (flags & AT_SYMLINK_NOFOLLOW) 2955 nd_flag &= ~FOLLOW; 2956 2957 error = pathbuf_copyin(path, &pb); 2958 if (error) 2959 return error; 2960 2961 NDINIT(&nd, LOOKUP, nd_flag, pb); 2962 2963 /* Override default credentials */ 2964 cred = kauth_cred_dup(l->l_cred); 2965 if (!(flags & AT_EACCESS)) { 2966 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2967 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2968 } 2969 nd.ni_cnd.cn_cred = cred; 2970 2971 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2972 pathbuf_destroy(pb); 2973 goto out; 2974 } 2975 vp = nd.ni_vp; 2976 pathbuf_destroy(pb); 2977 2978 /* Flags == 0 means only check for existence. */ 2979 if (mode) { 2980 vmode = 0; 2981 if (mode & R_OK) 2982 vmode |= VREAD; 2983 if (mode & W_OK) 2984 vmode |= VWRITE; 2985 if (mode & X_OK) 2986 vmode |= VEXEC; 2987 2988 error = VOP_ACCESS(vp, vmode, cred); 2989 if (!error && (vmode & VWRITE)) 2990 error = vn_writechk(vp); 2991 } 2992 vput(vp); 2993 out: 2994 kauth_cred_free(cred); 2995 return (error); 2996 } 2997 2998 int 2999 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3000 register_t *retval) 3001 { 3002 /* { 3003 syscallarg(int) fd; 3004 syscallarg(const char *) path; 3005 syscallarg(int) amode; 3006 syscallarg(int) flag; 3007 } */ 3008 3009 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3010 SCARG(uap, amode), SCARG(uap, flag)); 3011 } 3012 3013 /* 3014 * Common code for all sys_stat functions, including compat versions. 3015 */ 3016 int 3017 do_sys_stat(const char *userpath, unsigned int nd_flag, 3018 struct stat *sb) 3019 { 3020 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3021 } 3022 3023 int 3024 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3025 unsigned int nd_flag, struct stat *sb) 3026 { 3027 int error; 3028 struct pathbuf *pb; 3029 struct nameidata nd; 3030 3031 KASSERT(l != NULL || fdat == AT_FDCWD); 3032 3033 error = pathbuf_copyin(userpath, &pb); 3034 if (error) { 3035 return error; 3036 } 3037 3038 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3039 3040 error = fd_nameiat(l, fdat, &nd); 3041 if (error != 0) { 3042 pathbuf_destroy(pb); 3043 return error; 3044 } 3045 error = vn_stat(nd.ni_vp, sb); 3046 vput(nd.ni_vp); 3047 pathbuf_destroy(pb); 3048 return error; 3049 } 3050 3051 /* 3052 * Get file status; this version follows links. 3053 */ 3054 /* ARGSUSED */ 3055 int 3056 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3057 { 3058 /* { 3059 syscallarg(const char *) path; 3060 syscallarg(struct stat *) ub; 3061 } */ 3062 struct stat sb; 3063 int error; 3064 3065 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3066 if (error) 3067 return error; 3068 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3069 } 3070 3071 /* 3072 * Get file status; this version does not follow links. 3073 */ 3074 /* ARGSUSED */ 3075 int 3076 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3077 { 3078 /* { 3079 syscallarg(const char *) path; 3080 syscallarg(struct stat *) ub; 3081 } */ 3082 struct stat sb; 3083 int error; 3084 3085 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3086 if (error) 3087 return error; 3088 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3089 } 3090 3091 int 3092 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3093 register_t *retval) 3094 { 3095 /* { 3096 syscallarg(int) fd; 3097 syscallarg(const char *) path; 3098 syscallarg(struct stat *) buf; 3099 syscallarg(int) flag; 3100 } */ 3101 unsigned int nd_flag; 3102 struct stat sb; 3103 int error; 3104 3105 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3106 nd_flag = NOFOLLOW; 3107 else 3108 nd_flag = FOLLOW; 3109 3110 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3111 &sb); 3112 if (error) 3113 return error; 3114 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3115 } 3116 3117 /* 3118 * Get configurable pathname variables. 3119 */ 3120 /* ARGSUSED */ 3121 int 3122 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3123 { 3124 /* { 3125 syscallarg(const char *) path; 3126 syscallarg(int) name; 3127 } */ 3128 int error; 3129 struct pathbuf *pb; 3130 struct nameidata nd; 3131 3132 error = pathbuf_copyin(SCARG(uap, path), &pb); 3133 if (error) { 3134 return error; 3135 } 3136 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3137 if ((error = namei(&nd)) != 0) { 3138 pathbuf_destroy(pb); 3139 return (error); 3140 } 3141 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3142 vput(nd.ni_vp); 3143 pathbuf_destroy(pb); 3144 return (error); 3145 } 3146 3147 /* 3148 * Return target name of a symbolic link. 3149 */ 3150 /* ARGSUSED */ 3151 int 3152 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3153 register_t *retval) 3154 { 3155 /* { 3156 syscallarg(const char *) path; 3157 syscallarg(char *) buf; 3158 syscallarg(size_t) count; 3159 } */ 3160 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3161 SCARG(uap, buf), SCARG(uap, count), retval); 3162 } 3163 3164 static int 3165 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3166 size_t count, register_t *retval) 3167 { 3168 struct vnode *vp; 3169 struct iovec aiov; 3170 struct uio auio; 3171 int error; 3172 struct pathbuf *pb; 3173 struct nameidata nd; 3174 3175 error = pathbuf_copyin(path, &pb); 3176 if (error) { 3177 return error; 3178 } 3179 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3180 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3181 pathbuf_destroy(pb); 3182 return error; 3183 } 3184 vp = nd.ni_vp; 3185 pathbuf_destroy(pb); 3186 if (vp->v_type != VLNK) 3187 error = EINVAL; 3188 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3189 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3190 aiov.iov_base = buf; 3191 aiov.iov_len = count; 3192 auio.uio_iov = &aiov; 3193 auio.uio_iovcnt = 1; 3194 auio.uio_offset = 0; 3195 auio.uio_rw = UIO_READ; 3196 KASSERT(l == curlwp); 3197 auio.uio_vmspace = l->l_proc->p_vmspace; 3198 auio.uio_resid = count; 3199 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3200 *retval = count - auio.uio_resid; 3201 } 3202 vput(vp); 3203 return (error); 3204 } 3205 3206 int 3207 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3208 register_t *retval) 3209 { 3210 /* { 3211 syscallarg(int) fd; 3212 syscallarg(const char *) path; 3213 syscallarg(char *) buf; 3214 syscallarg(size_t) bufsize; 3215 } */ 3216 3217 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3218 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3219 } 3220 3221 /* 3222 * Change flags of a file given a path name. 3223 */ 3224 /* ARGSUSED */ 3225 int 3226 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3227 { 3228 /* { 3229 syscallarg(const char *) path; 3230 syscallarg(u_long) flags; 3231 } */ 3232 struct vnode *vp; 3233 int error; 3234 3235 error = namei_simple_user(SCARG(uap, path), 3236 NSM_FOLLOW_TRYEMULROOT, &vp); 3237 if (error != 0) 3238 return (error); 3239 error = change_flags(vp, SCARG(uap, flags), l); 3240 vput(vp); 3241 return (error); 3242 } 3243 3244 /* 3245 * Change flags of a file given a file descriptor. 3246 */ 3247 /* ARGSUSED */ 3248 int 3249 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3250 { 3251 /* { 3252 syscallarg(int) fd; 3253 syscallarg(u_long) flags; 3254 } */ 3255 struct vnode *vp; 3256 file_t *fp; 3257 int error; 3258 3259 /* fd_getvnode() will use the descriptor for us */ 3260 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3261 return (error); 3262 vp = fp->f_data; 3263 error = change_flags(vp, SCARG(uap, flags), l); 3264 VOP_UNLOCK(vp); 3265 fd_putfile(SCARG(uap, fd)); 3266 return (error); 3267 } 3268 3269 /* 3270 * Change flags of a file given a path name; this version does 3271 * not follow links. 3272 */ 3273 int 3274 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3275 { 3276 /* { 3277 syscallarg(const char *) path; 3278 syscallarg(u_long) flags; 3279 } */ 3280 struct vnode *vp; 3281 int error; 3282 3283 error = namei_simple_user(SCARG(uap, path), 3284 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3285 if (error != 0) 3286 return (error); 3287 error = change_flags(vp, SCARG(uap, flags), l); 3288 vput(vp); 3289 return (error); 3290 } 3291 3292 /* 3293 * Common routine to change flags of a file. 3294 */ 3295 int 3296 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3297 { 3298 struct vattr vattr; 3299 int error; 3300 3301 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3302 3303 vattr_null(&vattr); 3304 vattr.va_flags = flags; 3305 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3306 3307 return (error); 3308 } 3309 3310 /* 3311 * Change mode of a file given path name; this version follows links. 3312 */ 3313 /* ARGSUSED */ 3314 int 3315 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3316 { 3317 /* { 3318 syscallarg(const char *) path; 3319 syscallarg(int) mode; 3320 } */ 3321 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3322 SCARG(uap, mode), 0); 3323 } 3324 3325 int 3326 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3327 { 3328 int error; 3329 struct vnode *vp; 3330 namei_simple_flags_t ns_flag; 3331 3332 if (flags & AT_SYMLINK_NOFOLLOW) 3333 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3334 else 3335 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3336 3337 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3338 if (error != 0) 3339 return error; 3340 3341 error = change_mode(vp, mode, l); 3342 3343 vrele(vp); 3344 3345 return (error); 3346 } 3347 3348 /* 3349 * Change mode of a file given a file descriptor. 3350 */ 3351 /* ARGSUSED */ 3352 int 3353 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3354 { 3355 /* { 3356 syscallarg(int) fd; 3357 syscallarg(int) mode; 3358 } */ 3359 file_t *fp; 3360 int error; 3361 3362 /* fd_getvnode() will use the descriptor for us */ 3363 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3364 return (error); 3365 error = change_mode(fp->f_data, SCARG(uap, mode), l); 3366 fd_putfile(SCARG(uap, fd)); 3367 return (error); 3368 } 3369 3370 int 3371 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3372 register_t *retval) 3373 { 3374 /* { 3375 syscallarg(int) fd; 3376 syscallarg(const char *) path; 3377 syscallarg(int) mode; 3378 syscallarg(int) flag; 3379 } */ 3380 3381 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3382 SCARG(uap, mode), SCARG(uap, flag)); 3383 } 3384 3385 /* 3386 * Change mode of a file given path name; this version does not follow links. 3387 */ 3388 /* ARGSUSED */ 3389 int 3390 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3391 { 3392 /* { 3393 syscallarg(const char *) path; 3394 syscallarg(int) mode; 3395 } */ 3396 int error; 3397 struct vnode *vp; 3398 3399 error = namei_simple_user(SCARG(uap, path), 3400 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3401 if (error != 0) 3402 return (error); 3403 3404 error = change_mode(vp, SCARG(uap, mode), l); 3405 3406 vrele(vp); 3407 return (error); 3408 } 3409 3410 /* 3411 * Common routine to set mode given a vnode. 3412 */ 3413 static int 3414 change_mode(struct vnode *vp, int mode, struct lwp *l) 3415 { 3416 struct vattr vattr; 3417 int error; 3418 3419 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3420 vattr_null(&vattr); 3421 vattr.va_mode = mode & ALLPERMS; 3422 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3423 VOP_UNLOCK(vp); 3424 return (error); 3425 } 3426 3427 /* 3428 * Set ownership given a path name; this version follows links. 3429 */ 3430 /* ARGSUSED */ 3431 int 3432 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3433 { 3434 /* { 3435 syscallarg(const char *) path; 3436 syscallarg(uid_t) uid; 3437 syscallarg(gid_t) gid; 3438 } */ 3439 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3440 SCARG(uap, gid), 0); 3441 } 3442 3443 int 3444 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3445 gid_t gid, int flags) 3446 { 3447 int error; 3448 struct vnode *vp; 3449 namei_simple_flags_t ns_flag; 3450 3451 if (flags & AT_SYMLINK_NOFOLLOW) 3452 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3453 else 3454 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3455 3456 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3457 if (error != 0) 3458 return error; 3459 3460 error = change_owner(vp, uid, gid, l, 0); 3461 3462 vrele(vp); 3463 3464 return (error); 3465 } 3466 3467 /* 3468 * Set ownership given a path name; this version follows links. 3469 * Provides POSIX semantics. 3470 */ 3471 /* ARGSUSED */ 3472 int 3473 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3474 { 3475 /* { 3476 syscallarg(const char *) path; 3477 syscallarg(uid_t) uid; 3478 syscallarg(gid_t) gid; 3479 } */ 3480 int error; 3481 struct vnode *vp; 3482 3483 error = namei_simple_user(SCARG(uap, path), 3484 NSM_FOLLOW_TRYEMULROOT, &vp); 3485 if (error != 0) 3486 return (error); 3487 3488 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3489 3490 vrele(vp); 3491 return (error); 3492 } 3493 3494 /* 3495 * Set ownership given a file descriptor. 3496 */ 3497 /* ARGSUSED */ 3498 int 3499 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3500 { 3501 /* { 3502 syscallarg(int) fd; 3503 syscallarg(uid_t) uid; 3504 syscallarg(gid_t) gid; 3505 } */ 3506 int error; 3507 file_t *fp; 3508 3509 /* fd_getvnode() will use the descriptor for us */ 3510 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3511 return (error); 3512 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3513 l, 0); 3514 fd_putfile(SCARG(uap, fd)); 3515 return (error); 3516 } 3517 3518 int 3519 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3520 register_t *retval) 3521 { 3522 /* { 3523 syscallarg(int) fd; 3524 syscallarg(const char *) path; 3525 syscallarg(uid_t) owner; 3526 syscallarg(gid_t) group; 3527 syscallarg(int) flag; 3528 } */ 3529 3530 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3531 SCARG(uap, owner), SCARG(uap, group), 3532 SCARG(uap, flag)); 3533 } 3534 3535 /* 3536 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3537 */ 3538 /* ARGSUSED */ 3539 int 3540 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3541 { 3542 /* { 3543 syscallarg(int) fd; 3544 syscallarg(uid_t) uid; 3545 syscallarg(gid_t) gid; 3546 } */ 3547 int error; 3548 file_t *fp; 3549 3550 /* fd_getvnode() will use the descriptor for us */ 3551 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3552 return (error); 3553 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3554 l, 1); 3555 fd_putfile(SCARG(uap, fd)); 3556 return (error); 3557 } 3558 3559 /* 3560 * Set ownership given a path name; this version does not follow links. 3561 */ 3562 /* ARGSUSED */ 3563 int 3564 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3565 { 3566 /* { 3567 syscallarg(const char *) path; 3568 syscallarg(uid_t) uid; 3569 syscallarg(gid_t) gid; 3570 } */ 3571 int error; 3572 struct vnode *vp; 3573 3574 error = namei_simple_user(SCARG(uap, path), 3575 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3576 if (error != 0) 3577 return (error); 3578 3579 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3580 3581 vrele(vp); 3582 return (error); 3583 } 3584 3585 /* 3586 * Set ownership given a path name; this version does not follow links. 3587 * Provides POSIX/XPG semantics. 3588 */ 3589 /* ARGSUSED */ 3590 int 3591 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3592 { 3593 /* { 3594 syscallarg(const char *) path; 3595 syscallarg(uid_t) uid; 3596 syscallarg(gid_t) gid; 3597 } */ 3598 int error; 3599 struct vnode *vp; 3600 3601 error = namei_simple_user(SCARG(uap, path), 3602 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3603 if (error != 0) 3604 return (error); 3605 3606 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3607 3608 vrele(vp); 3609 return (error); 3610 } 3611 3612 /* 3613 * Common routine to set ownership given a vnode. 3614 */ 3615 static int 3616 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3617 int posix_semantics) 3618 { 3619 struct vattr vattr; 3620 mode_t newmode; 3621 int error; 3622 3623 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3624 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3625 goto out; 3626 3627 #define CHANGED(x) ((int)(x) != -1) 3628 newmode = vattr.va_mode; 3629 if (posix_semantics) { 3630 /* 3631 * POSIX/XPG semantics: if the caller is not the super-user, 3632 * clear set-user-id and set-group-id bits. Both POSIX and 3633 * the XPG consider the behaviour for calls by the super-user 3634 * implementation-defined; we leave the set-user-id and set- 3635 * group-id settings intact in that case. 3636 */ 3637 if (vattr.va_mode & S_ISUID) { 3638 if (kauth_authorize_vnode(l->l_cred, 3639 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3640 newmode &= ~S_ISUID; 3641 } 3642 if (vattr.va_mode & S_ISGID) { 3643 if (kauth_authorize_vnode(l->l_cred, 3644 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3645 newmode &= ~S_ISGID; 3646 } 3647 } else { 3648 /* 3649 * NetBSD semantics: when changing owner and/or group, 3650 * clear the respective bit(s). 3651 */ 3652 if (CHANGED(uid)) 3653 newmode &= ~S_ISUID; 3654 if (CHANGED(gid)) 3655 newmode &= ~S_ISGID; 3656 } 3657 /* Update va_mode iff altered. */ 3658 if (vattr.va_mode == newmode) 3659 newmode = VNOVAL; 3660 3661 vattr_null(&vattr); 3662 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3663 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3664 vattr.va_mode = newmode; 3665 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3666 #undef CHANGED 3667 3668 out: 3669 VOP_UNLOCK(vp); 3670 return (error); 3671 } 3672 3673 /* 3674 * Set the access and modification times given a path name; this 3675 * version follows links. 3676 */ 3677 /* ARGSUSED */ 3678 int 3679 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3680 register_t *retval) 3681 { 3682 /* { 3683 syscallarg(const char *) path; 3684 syscallarg(const struct timeval *) tptr; 3685 } */ 3686 3687 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3688 SCARG(uap, tptr), UIO_USERSPACE); 3689 } 3690 3691 /* 3692 * Set the access and modification times given a file descriptor. 3693 */ 3694 /* ARGSUSED */ 3695 int 3696 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3697 register_t *retval) 3698 { 3699 /* { 3700 syscallarg(int) fd; 3701 syscallarg(const struct timeval *) tptr; 3702 } */ 3703 int error; 3704 file_t *fp; 3705 3706 /* fd_getvnode() will use the descriptor for us */ 3707 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3708 return (error); 3709 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3710 UIO_USERSPACE); 3711 fd_putfile(SCARG(uap, fd)); 3712 return (error); 3713 } 3714 3715 int 3716 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3717 register_t *retval) 3718 { 3719 /* { 3720 syscallarg(int) fd; 3721 syscallarg(const struct timespec *) tptr; 3722 } */ 3723 int error; 3724 file_t *fp; 3725 3726 /* fd_getvnode() will use the descriptor for us */ 3727 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3728 return (error); 3729 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0, 3730 SCARG(uap, tptr), UIO_USERSPACE); 3731 fd_putfile(SCARG(uap, fd)); 3732 return (error); 3733 } 3734 3735 /* 3736 * Set the access and modification times given a path name; this 3737 * version does not follow links. 3738 */ 3739 int 3740 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3741 register_t *retval) 3742 { 3743 /* { 3744 syscallarg(const char *) path; 3745 syscallarg(const struct timeval *) tptr; 3746 } */ 3747 3748 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3749 SCARG(uap, tptr), UIO_USERSPACE); 3750 } 3751 3752 int 3753 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3754 register_t *retval) 3755 { 3756 /* { 3757 syscallarg(int) fd; 3758 syscallarg(const char *) path; 3759 syscallarg(const struct timespec *) tptr; 3760 syscallarg(int) flag; 3761 } */ 3762 int follow; 3763 const struct timespec *tptr; 3764 int error; 3765 3766 tptr = SCARG(uap, tptr); 3767 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3768 3769 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3770 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3771 3772 return error; 3773 } 3774 3775 /* 3776 * Common routine to set access and modification times given a vnode. 3777 */ 3778 int 3779 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3780 const struct timespec *tptr, enum uio_seg seg) 3781 { 3782 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3783 } 3784 3785 int 3786 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3787 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3788 { 3789 struct vattr vattr; 3790 int error, dorele = 0; 3791 namei_simple_flags_t sflags; 3792 bool vanull, setbirthtime; 3793 struct timespec ts[2]; 3794 3795 KASSERT(l != NULL || fdat == AT_FDCWD); 3796 3797 /* 3798 * I have checked all callers and they pass either FOLLOW, 3799 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3800 * is 0. More to the point, they don't pass anything else. 3801 * Let's keep it that way at least until the namei interfaces 3802 * are fully sanitized. 3803 */ 3804 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3805 sflags = (flag == FOLLOW) ? 3806 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3807 3808 if (tptr == NULL) { 3809 vanull = true; 3810 nanotime(&ts[0]); 3811 ts[1] = ts[0]; 3812 } else { 3813 vanull = false; 3814 if (seg != UIO_SYSSPACE) { 3815 error = copyin(tptr, ts, sizeof (ts)); 3816 if (error != 0) 3817 return error; 3818 } else { 3819 ts[0] = tptr[0]; 3820 ts[1] = tptr[1]; 3821 } 3822 } 3823 3824 if (ts[0].tv_nsec == UTIME_NOW) { 3825 nanotime(&ts[0]); 3826 if (ts[1].tv_nsec == UTIME_NOW) { 3827 vanull = true; 3828 ts[1] = ts[0]; 3829 } 3830 } else if (ts[1].tv_nsec == UTIME_NOW) 3831 nanotime(&ts[1]); 3832 3833 if (vp == NULL) { 3834 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3835 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3836 if (error != 0) 3837 return error; 3838 dorele = 1; 3839 } 3840 3841 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3842 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3843 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3844 vattr_null(&vattr); 3845 3846 if (ts[0].tv_nsec != UTIME_OMIT) 3847 vattr.va_atime = ts[0]; 3848 3849 if (ts[1].tv_nsec != UTIME_OMIT) { 3850 vattr.va_mtime = ts[1]; 3851 if (setbirthtime) 3852 vattr.va_birthtime = ts[1]; 3853 } 3854 3855 if (vanull) 3856 vattr.va_vaflags |= VA_UTIMES_NULL; 3857 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3858 VOP_UNLOCK(vp); 3859 3860 if (dorele != 0) 3861 vrele(vp); 3862 3863 return error; 3864 } 3865 3866 int 3867 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3868 const struct timeval *tptr, enum uio_seg seg) 3869 { 3870 struct timespec ts[2]; 3871 struct timespec *tsptr = NULL; 3872 int error; 3873 3874 if (tptr != NULL) { 3875 struct timeval tv[2]; 3876 3877 if (seg != UIO_SYSSPACE) { 3878 error = copyin(tptr, tv, sizeof (tv)); 3879 if (error != 0) 3880 return error; 3881 tptr = tv; 3882 } 3883 3884 if ((tv[0].tv_usec == UTIME_NOW) || 3885 (tv[0].tv_usec == UTIME_OMIT)) 3886 ts[0].tv_nsec = tv[0].tv_usec; 3887 else 3888 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3889 3890 if ((tv[1].tv_usec == UTIME_NOW) || 3891 (tv[1].tv_usec == UTIME_OMIT)) 3892 ts[1].tv_nsec = tv[1].tv_usec; 3893 else 3894 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3895 3896 tsptr = &ts[0]; 3897 } 3898 3899 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3900 } 3901 3902 /* 3903 * Truncate a file given its path name. 3904 */ 3905 /* ARGSUSED */ 3906 int 3907 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3908 { 3909 /* { 3910 syscallarg(const char *) path; 3911 syscallarg(int) pad; 3912 syscallarg(off_t) length; 3913 } */ 3914 struct vnode *vp; 3915 struct vattr vattr; 3916 int error; 3917 3918 error = namei_simple_user(SCARG(uap, path), 3919 NSM_FOLLOW_TRYEMULROOT, &vp); 3920 if (error != 0) 3921 return (error); 3922 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3923 if (vp->v_type == VDIR) 3924 error = EISDIR; 3925 else if ((error = vn_writechk(vp)) == 0 && 3926 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3927 vattr_null(&vattr); 3928 vattr.va_size = SCARG(uap, length); 3929 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3930 } 3931 vput(vp); 3932 return (error); 3933 } 3934 3935 /* 3936 * Truncate a file given a file descriptor. 3937 */ 3938 /* ARGSUSED */ 3939 int 3940 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3941 { 3942 /* { 3943 syscallarg(int) fd; 3944 syscallarg(int) pad; 3945 syscallarg(off_t) length; 3946 } */ 3947 struct vattr vattr; 3948 struct vnode *vp; 3949 file_t *fp; 3950 int error; 3951 3952 /* fd_getvnode() will use the descriptor for us */ 3953 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3954 return (error); 3955 if ((fp->f_flag & FWRITE) == 0) { 3956 error = EINVAL; 3957 goto out; 3958 } 3959 vp = fp->f_data; 3960 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3961 if (vp->v_type == VDIR) 3962 error = EISDIR; 3963 else if ((error = vn_writechk(vp)) == 0) { 3964 vattr_null(&vattr); 3965 vattr.va_size = SCARG(uap, length); 3966 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3967 } 3968 VOP_UNLOCK(vp); 3969 out: 3970 fd_putfile(SCARG(uap, fd)); 3971 return (error); 3972 } 3973 3974 /* 3975 * Sync an open file. 3976 */ 3977 /* ARGSUSED */ 3978 int 3979 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3980 { 3981 /* { 3982 syscallarg(int) fd; 3983 } */ 3984 struct vnode *vp; 3985 file_t *fp; 3986 int error; 3987 3988 /* fd_getvnode() will use the descriptor for us */ 3989 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3990 return (error); 3991 vp = fp->f_data; 3992 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3993 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3994 VOP_UNLOCK(vp); 3995 fd_putfile(SCARG(uap, fd)); 3996 return (error); 3997 } 3998 3999 /* 4000 * Sync a range of file data. API modeled after that found in AIX. 4001 * 4002 * FDATASYNC indicates that we need only save enough metadata to be able 4003 * to re-read the written data. Note we duplicate AIX's requirement that 4004 * the file be open for writing. 4005 */ 4006 /* ARGSUSED */ 4007 int 4008 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4009 { 4010 /* { 4011 syscallarg(int) fd; 4012 syscallarg(int) flags; 4013 syscallarg(off_t) start; 4014 syscallarg(off_t) length; 4015 } */ 4016 struct vnode *vp; 4017 file_t *fp; 4018 int flags, nflags; 4019 off_t s, e, len; 4020 int error; 4021 4022 /* fd_getvnode() will use the descriptor for us */ 4023 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4024 return (error); 4025 4026 if ((fp->f_flag & FWRITE) == 0) { 4027 error = EBADF; 4028 goto out; 4029 } 4030 4031 flags = SCARG(uap, flags); 4032 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4033 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4034 error = EINVAL; 4035 goto out; 4036 } 4037 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4038 if (flags & FDATASYNC) 4039 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4040 else 4041 nflags = FSYNC_WAIT; 4042 if (flags & FDISKSYNC) 4043 nflags |= FSYNC_CACHE; 4044 4045 len = SCARG(uap, length); 4046 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4047 if (len) { 4048 s = SCARG(uap, start); 4049 e = s + len; 4050 if (e < s) { 4051 error = EINVAL; 4052 goto out; 4053 } 4054 } else { 4055 e = 0; 4056 s = 0; 4057 } 4058 4059 vp = fp->f_data; 4060 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4061 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4062 VOP_UNLOCK(vp); 4063 out: 4064 fd_putfile(SCARG(uap, fd)); 4065 return (error); 4066 } 4067 4068 /* 4069 * Sync the data of an open file. 4070 */ 4071 /* ARGSUSED */ 4072 int 4073 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4074 { 4075 /* { 4076 syscallarg(int) fd; 4077 } */ 4078 struct vnode *vp; 4079 file_t *fp; 4080 int error; 4081 4082 /* fd_getvnode() will use the descriptor for us */ 4083 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4084 return (error); 4085 if ((fp->f_flag & FWRITE) == 0) { 4086 fd_putfile(SCARG(uap, fd)); 4087 return (EBADF); 4088 } 4089 vp = fp->f_data; 4090 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4091 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4092 VOP_UNLOCK(vp); 4093 fd_putfile(SCARG(uap, fd)); 4094 return (error); 4095 } 4096 4097 /* 4098 * Rename files, (standard) BSD semantics frontend. 4099 */ 4100 /* ARGSUSED */ 4101 int 4102 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4103 { 4104 /* { 4105 syscallarg(const char *) from; 4106 syscallarg(const char *) to; 4107 } */ 4108 4109 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4110 SCARG(uap, to), UIO_USERSPACE, 0)); 4111 } 4112 4113 int 4114 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4115 register_t *retval) 4116 { 4117 /* { 4118 syscallarg(int) fromfd; 4119 syscallarg(const char *) from; 4120 syscallarg(int) tofd; 4121 syscallarg(const char *) to; 4122 } */ 4123 4124 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4125 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4126 } 4127 4128 /* 4129 * Rename files, POSIX semantics frontend. 4130 */ 4131 /* ARGSUSED */ 4132 int 4133 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4134 { 4135 /* { 4136 syscallarg(const char *) from; 4137 syscallarg(const char *) to; 4138 } */ 4139 4140 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4141 SCARG(uap, to), UIO_USERSPACE, 1)); 4142 } 4143 4144 /* 4145 * Rename files. Source and destination must either both be directories, 4146 * or both not be directories. If target is a directory, it must be empty. 4147 * If `from' and `to' refer to the same object, the value of the `retain' 4148 * argument is used to determine whether `from' will be 4149 * 4150 * (retain == 0) deleted unless `from' and `to' refer to the same 4151 * object in the file system's name space (BSD). 4152 * (retain == 1) always retained (POSIX). 4153 * 4154 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4155 */ 4156 int 4157 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4158 { 4159 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4160 } 4161 4162 static int 4163 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4164 const char *to, enum uio_seg seg, int retain) 4165 { 4166 struct pathbuf *fpb, *tpb; 4167 struct nameidata fnd, tnd; 4168 struct vnode *fdvp, *fvp; 4169 struct vnode *tdvp, *tvp; 4170 struct mount *mp, *tmp; 4171 int error; 4172 4173 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4174 4175 error = pathbuf_maybe_copyin(from, seg, &fpb); 4176 if (error) 4177 goto out0; 4178 KASSERT(fpb != NULL); 4179 4180 error = pathbuf_maybe_copyin(to, seg, &tpb); 4181 if (error) 4182 goto out1; 4183 KASSERT(tpb != NULL); 4184 4185 /* 4186 * Lookup from. 4187 * 4188 * XXX LOCKPARENT is wrong because we don't actually want it 4189 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4190 * insane, so for the time being we need to leave it like this. 4191 */ 4192 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb); 4193 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4194 goto out2; 4195 4196 /* 4197 * Pull out the important results of the lookup, fdvp and fvp. 4198 * Of course, fvp is bogus because we're about to unlock fdvp. 4199 */ 4200 fdvp = fnd.ni_dvp; 4201 fvp = fnd.ni_vp; 4202 KASSERT(fdvp != NULL); 4203 KASSERT(fvp != NULL); 4204 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4205 4206 /* 4207 * Make sure neither fdvp nor fvp is locked. 4208 */ 4209 if (fdvp != fvp) 4210 VOP_UNLOCK(fdvp); 4211 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4212 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4213 4214 /* 4215 * Reject renaming `.' and `..'. Can't do this until after 4216 * namei because we need namei's parsing to find the final 4217 * component name. (namei should just leave us with the final 4218 * component name and not look it up itself, but anyway...) 4219 * 4220 * This was here before because we used to relookup from 4221 * instead of to and relookup requires the caller to check 4222 * this, but now file systems may depend on this check, so we 4223 * must retain it until the file systems are all rototilled. 4224 */ 4225 if (((fnd.ni_cnd.cn_namelen == 1) && 4226 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4227 ((fnd.ni_cnd.cn_namelen == 2) && 4228 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4229 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4230 error = EINVAL; /* XXX EISDIR? */ 4231 goto abort0; 4232 } 4233 4234 /* 4235 * Lookup to. 4236 * 4237 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4238 * fvp here to decide whether to add CREATEDIR is a load of 4239 * bollocks because fvp might be the wrong node by now, since 4240 * fdvp is unlocked. 4241 * 4242 * XXX Why not pass CREATEDIR always? 4243 */ 4244 NDINIT(&tnd, RENAME, 4245 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME | 4246 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4247 tpb); 4248 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4249 goto abort0; 4250 4251 /* 4252 * Pull out the important results of the lookup, tdvp and tvp. 4253 * Of course, tvp is bogus because we're about to unlock tdvp. 4254 */ 4255 tdvp = tnd.ni_dvp; 4256 tvp = tnd.ni_vp; 4257 KASSERT(tdvp != NULL); 4258 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4259 4260 /* 4261 * Make sure neither tdvp nor tvp is locked. 4262 */ 4263 if (tdvp != tvp) 4264 VOP_UNLOCK(tdvp); 4265 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4266 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4267 4268 /* 4269 * Reject renaming onto `.' or `..'. relookup is unhappy with 4270 * these, which is why we must do this here. Once upon a time 4271 * we relooked up from instead of to, and consequently didn't 4272 * need this check, but now that we relookup to instead of 4273 * from, we need this; and we shall need it forever forward 4274 * until the VOP_RENAME protocol changes, because file systems 4275 * will no doubt begin to depend on this check. 4276 */ 4277 if (((tnd.ni_cnd.cn_namelen == 1) && 4278 (tnd.ni_cnd.cn_nameptr[0] == '.')) || 4279 ((tnd.ni_cnd.cn_namelen == 2) && 4280 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4281 (tnd.ni_cnd.cn_nameptr[1] == '.'))) { 4282 error = EINVAL; /* XXX EISDIR? */ 4283 goto abort1; 4284 } 4285 4286 /* 4287 * Get the mount point. If the file system has been unmounted, 4288 * which it may be because we're not holding any vnode locks, 4289 * then v_mount will be NULL. We're not really supposed to 4290 * read v_mount without holding the vnode lock, but since we 4291 * have fdvp referenced, if fdvp->v_mount changes then at worst 4292 * it will be set to NULL, not changed to another mount point. 4293 * And, of course, since it is up to the file system to 4294 * determine the real lock order, we can't lock both fdvp and 4295 * tdvp at the same time. 4296 */ 4297 mp = fdvp->v_mount; 4298 if (mp == NULL) { 4299 error = ENOENT; 4300 goto abort1; 4301 } 4302 4303 /* 4304 * Make sure the mount points match. Again, although we don't 4305 * hold any vnode locks, the v_mount fields may change -- but 4306 * at worst they will change to NULL, so this will never become 4307 * a cross-device rename, because we hold vnode references. 4308 * 4309 * XXX Because nothing is locked and the compiler may reorder 4310 * things here, unmounting the file system at an inopportune 4311 * moment may cause rename to fail with ENXDEV when it really 4312 * should fail with ENOENT. 4313 */ 4314 tmp = tdvp->v_mount; 4315 if (tmp == NULL) { 4316 error = ENOENT; 4317 goto abort1; 4318 } 4319 4320 if (mp != tmp) { 4321 error = EXDEV; 4322 goto abort1; 4323 } 4324 4325 /* 4326 * Take the vfs rename lock to avoid cross-directory screw cases. 4327 * Nothing is locked currently, so taking this lock is safe. 4328 */ 4329 error = VFS_RENAMELOCK_ENTER(mp); 4330 if (error) 4331 goto abort1; 4332 4333 /* 4334 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4335 * and nothing is locked except for the vfs rename lock. 4336 * 4337 * The next step is a little rain dance to conform to the 4338 * insane lock protocol, even though it does nothing to ward 4339 * off race conditions. 4340 * 4341 * We need tdvp and tvp to be locked. However, because we have 4342 * unlocked tdvp in order to hold no locks while we take the 4343 * vfs rename lock, tvp may be wrong here, and we can't safely 4344 * lock it even if the sensible file systems will just unlock 4345 * it straight away. Consequently, we must lock tdvp and then 4346 * relookup tvp to get it locked. 4347 * 4348 * Finally, because the VOP_RENAME protocol is brain-damaged 4349 * and various file systems insanely depend on the semantics of 4350 * this brain damage, the lookup of to must be the last lookup 4351 * before VOP_RENAME. 4352 */ 4353 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4354 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4355 if (error) 4356 goto abort2; 4357 4358 /* 4359 * Drop the old tvp and pick up the new one -- which might be 4360 * the same, but that doesn't matter to us. After this, tdvp 4361 * and tvp should both be locked. 4362 */ 4363 if (tvp != NULL) 4364 vrele(tvp); 4365 tvp = tnd.ni_vp; 4366 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4367 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4368 4369 /* 4370 * The old do_sys_rename had various consistency checks here 4371 * involving fvp and tvp. fvp is bogus already here, and tvp 4372 * will become bogus soon in any sensible file system, so the 4373 * only purpose in putting these checks here is to give lip 4374 * service to these screw cases and to acknowledge that they 4375 * exist, not actually to handle them, but here you go 4376 * anyway... 4377 */ 4378 4379 /* 4380 * Acknowledge that directories and non-directories aren't 4381 * suposed to mix. 4382 */ 4383 if (tvp != NULL) { 4384 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4385 error = ENOTDIR; 4386 goto abort3; 4387 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4388 error = EISDIR; 4389 goto abort3; 4390 } 4391 } 4392 4393 /* 4394 * Acknowledge some random screw case, among the dozens that 4395 * might arise. 4396 */ 4397 if (fvp == tdvp) { 4398 error = EINVAL; 4399 goto abort3; 4400 } 4401 4402 /* 4403 * Acknowledge that POSIX has a wacky screw case. 4404 * 4405 * XXX Eventually the retain flag needs to be passed on to 4406 * VOP_RENAME. 4407 */ 4408 if (fvp == tvp) { 4409 if (retain) { 4410 error = 0; 4411 goto abort3; 4412 } else if ((fdvp == tdvp) && 4413 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4414 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4415 fnd.ni_cnd.cn_namelen))) { 4416 error = 0; 4417 goto abort3; 4418 } 4419 } 4420 4421 /* 4422 * Make sure veriexec can screw us up. (But a race can screw 4423 * up veriexec, of course -- remember, fvp and (soon) tvp are 4424 * bogus.) 4425 */ 4426 #if NVERIEXEC > 0 4427 { 4428 char *f1, *f2; 4429 size_t f1_len; 4430 size_t f2_len; 4431 4432 f1_len = fnd.ni_cnd.cn_namelen + 1; 4433 f1 = kmem_alloc(f1_len, KM_SLEEP); 4434 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4435 4436 f2_len = tnd.ni_cnd.cn_namelen + 1; 4437 f2 = kmem_alloc(f2_len, KM_SLEEP); 4438 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4439 4440 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4441 4442 kmem_free(f1, f1_len); 4443 kmem_free(f2, f2_len); 4444 4445 if (error) 4446 goto abort3; 4447 } 4448 #endif /* NVERIEXEC > 0 */ 4449 4450 /* 4451 * All ready. Incant the rename vop. 4452 */ 4453 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4454 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4455 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4456 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4457 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4458 4459 /* 4460 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4461 * tdvp and tvp. But we can't assert any of that. 4462 */ 4463 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4464 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4465 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4466 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4467 4468 /* 4469 * So all we have left to do is to drop the rename lock and 4470 * destroy the pathbufs. 4471 */ 4472 VFS_RENAMELOCK_EXIT(mp); 4473 goto out2; 4474 4475 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4476 VOP_UNLOCK(tvp); 4477 abort2: VOP_UNLOCK(tdvp); 4478 VFS_RENAMELOCK_EXIT(mp); 4479 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4480 vrele(tdvp); 4481 if (tvp != NULL) 4482 vrele(tvp); 4483 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4484 vrele(fdvp); 4485 vrele(fvp); 4486 out2: pathbuf_destroy(tpb); 4487 out1: pathbuf_destroy(fpb); 4488 out0: return error; 4489 } 4490 4491 /* 4492 * Make a directory file. 4493 */ 4494 /* ARGSUSED */ 4495 int 4496 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4497 { 4498 /* { 4499 syscallarg(const char *) path; 4500 syscallarg(int) mode; 4501 } */ 4502 4503 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4504 SCARG(uap, mode), UIO_USERSPACE); 4505 } 4506 4507 int 4508 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4509 register_t *retval) 4510 { 4511 /* { 4512 syscallarg(int) fd; 4513 syscallarg(const char *) path; 4514 syscallarg(int) mode; 4515 } */ 4516 4517 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4518 SCARG(uap, mode), UIO_USERSPACE); 4519 } 4520 4521 4522 int 4523 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4524 { 4525 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4526 } 4527 4528 static int 4529 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4530 enum uio_seg seg) 4531 { 4532 struct proc *p = curlwp->l_proc; 4533 struct vnode *vp; 4534 struct vattr vattr; 4535 int error; 4536 struct pathbuf *pb; 4537 struct nameidata nd; 4538 4539 KASSERT(l != NULL || fdat == AT_FDCWD); 4540 4541 /* XXX bollocks, should pass in a pathbuf */ 4542 error = pathbuf_maybe_copyin(path, seg, &pb); 4543 if (error) { 4544 return error; 4545 } 4546 4547 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4548 4549 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4550 pathbuf_destroy(pb); 4551 return (error); 4552 } 4553 vp = nd.ni_vp; 4554 if (vp != NULL) { 4555 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4556 if (nd.ni_dvp == vp) 4557 vrele(nd.ni_dvp); 4558 else 4559 vput(nd.ni_dvp); 4560 vrele(vp); 4561 pathbuf_destroy(pb); 4562 return (EEXIST); 4563 } 4564 vattr_null(&vattr); 4565 vattr.va_type = VDIR; 4566 /* We will read cwdi->cwdi_cmask unlocked. */ 4567 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4568 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4569 if (!error) 4570 vrele(nd.ni_vp); 4571 vput(nd.ni_dvp); 4572 pathbuf_destroy(pb); 4573 return (error); 4574 } 4575 4576 /* 4577 * Remove a directory file. 4578 */ 4579 /* ARGSUSED */ 4580 int 4581 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4582 { 4583 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4584 AT_REMOVEDIR, UIO_USERSPACE); 4585 } 4586 4587 /* 4588 * Read a block of directory entries in a file system independent format. 4589 */ 4590 int 4591 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4592 { 4593 /* { 4594 syscallarg(int) fd; 4595 syscallarg(char *) buf; 4596 syscallarg(size_t) count; 4597 } */ 4598 file_t *fp; 4599 int error, done; 4600 4601 /* fd_getvnode() will use the descriptor for us */ 4602 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4603 return (error); 4604 if ((fp->f_flag & FREAD) == 0) { 4605 error = EBADF; 4606 goto out; 4607 } 4608 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4609 SCARG(uap, count), &done, l, 0, 0); 4610 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4611 *retval = done; 4612 out: 4613 fd_putfile(SCARG(uap, fd)); 4614 return (error); 4615 } 4616 4617 /* 4618 * Set the mode mask for creation of filesystem nodes. 4619 */ 4620 int 4621 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4622 { 4623 /* { 4624 syscallarg(mode_t) newmask; 4625 } */ 4626 struct proc *p = l->l_proc; 4627 struct cwdinfo *cwdi; 4628 4629 /* 4630 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4631 * important is that we serialize changes to the mask. The 4632 * rw_exit() will issue a write memory barrier on our behalf, 4633 * and force the changes out to other CPUs (as it must use an 4634 * atomic operation, draining the local CPU's store buffers). 4635 */ 4636 cwdi = p->p_cwdi; 4637 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4638 *retval = cwdi->cwdi_cmask; 4639 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4640 rw_exit(&cwdi->cwdi_lock); 4641 4642 return (0); 4643 } 4644 4645 int 4646 dorevoke(struct vnode *vp, kauth_cred_t cred) 4647 { 4648 struct vattr vattr; 4649 int error, fs_decision; 4650 4651 vn_lock(vp, LK_SHARED | LK_RETRY); 4652 error = VOP_GETATTR(vp, &vattr, cred); 4653 VOP_UNLOCK(vp); 4654 if (error != 0) 4655 return error; 4656 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4657 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4658 fs_decision); 4659 if (!error) 4660 VOP_REVOKE(vp, REVOKEALL); 4661 return (error); 4662 } 4663 4664 /* 4665 * Void all references to file by ripping underlying filesystem 4666 * away from vnode. 4667 */ 4668 /* ARGSUSED */ 4669 int 4670 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4671 { 4672 /* { 4673 syscallarg(const char *) path; 4674 } */ 4675 struct vnode *vp; 4676 int error; 4677 4678 error = namei_simple_user(SCARG(uap, path), 4679 NSM_FOLLOW_TRYEMULROOT, &vp); 4680 if (error != 0) 4681 return (error); 4682 error = dorevoke(vp, l->l_cred); 4683 vrele(vp); 4684 return (error); 4685 } 4686