1 /* $NetBSD: vfs_syscalls.c,v 1.469 2013/11/18 01:31:42 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.469 2013/11/18 01:31:42 chs Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 #include <miscfs/specfs/specdev.h> 113 114 #include <nfs/rpcv2.h> 115 #include <nfs/nfsproto.h> 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 119 static int change_flags(struct vnode *, u_long, struct lwp *); 120 static int change_mode(struct vnode *, int, struct lwp *l); 121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 122 static int do_open(lwp_t *, struct vnode *, struct pathbuf *, int, int, int *); 123 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 124 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 125 enum uio_seg); 126 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 127 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 128 enum uio_seg); 129 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 130 enum uio_seg, int); 131 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 132 size_t, register_t *); 133 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 134 135 static int fd_nameiat(struct lwp *, int, struct nameidata *); 136 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 137 namei_simple_flags_t, struct vnode **); 138 139 140 /* 141 * This table is used to maintain compatibility with 4.3BSD 142 * and NetBSD 0.9 mount syscalls - and possibly other systems. 143 * Note, the order is important! 144 * 145 * Do not modify this table. It should only contain filesystems 146 * supported by NetBSD 0.9 and 4.3BSD. 147 */ 148 const char * const mountcompatnames[] = { 149 NULL, /* 0 = MOUNT_NONE */ 150 MOUNT_FFS, /* 1 = MOUNT_UFS */ 151 MOUNT_NFS, /* 2 */ 152 MOUNT_MFS, /* 3 */ 153 MOUNT_MSDOS, /* 4 */ 154 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 155 MOUNT_FDESC, /* 6 */ 156 MOUNT_KERNFS, /* 7 */ 157 NULL, /* 8 = MOUNT_DEVFS */ 158 MOUNT_AFS, /* 9 */ 159 }; 160 161 const int nmountcompatnames = __arraycount(mountcompatnames); 162 163 static int 164 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 165 { 166 file_t *dfp; 167 int error; 168 169 if (fdat != AT_FDCWD) { 170 if ((error = fd_getvnode(fdat, &dfp)) != 0) 171 goto out; 172 173 NDAT(ndp, dfp->f_data); 174 } 175 176 error = namei(ndp); 177 178 if (fdat != AT_FDCWD) 179 fd_putfile(fdat); 180 out: 181 return error; 182 } 183 184 static int 185 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 186 namei_simple_flags_t sflags, struct vnode **vp_ret) 187 { 188 file_t *dfp; 189 struct vnode *dvp; 190 int error; 191 192 if (fdat != AT_FDCWD) { 193 if ((error = fd_getvnode(fdat, &dfp)) != 0) 194 goto out; 195 196 dvp = dfp->f_data; 197 } else { 198 dvp = NULL; 199 } 200 201 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 202 203 if (fdat != AT_FDCWD) 204 fd_putfile(fdat); 205 out: 206 return error; 207 } 208 209 static int 210 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 211 { 212 int error; 213 214 fp->f_flag = flags & FMASK; 215 fp->f_type = DTYPE_VNODE; 216 fp->f_ops = &vnops; 217 fp->f_data = vp; 218 219 if (flags & (O_EXLOCK | O_SHLOCK)) { 220 struct flock lf; 221 int type; 222 223 lf.l_whence = SEEK_SET; 224 lf.l_start = 0; 225 lf.l_len = 0; 226 if (flags & O_EXLOCK) 227 lf.l_type = F_WRLCK; 228 else 229 lf.l_type = F_RDLCK; 230 type = F_FLOCK; 231 if ((flags & FNONBLOCK) == 0) 232 type |= F_WAIT; 233 VOP_UNLOCK(vp); 234 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 235 if (error) { 236 (void) vn_close(vp, fp->f_flag, fp->f_cred); 237 fd_abort(l->l_proc, fp, indx); 238 return error; 239 } 240 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 241 atomic_or_uint(&fp->f_flag, FHASLOCK); 242 } 243 if (flags & O_CLOEXEC) 244 fd_set_exclose(l, indx, true); 245 return 0; 246 } 247 248 static int 249 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 250 void *data, size_t *data_len) 251 { 252 struct mount *mp; 253 int error = 0, saved_flags; 254 255 mp = vp->v_mount; 256 saved_flags = mp->mnt_flag; 257 258 /* We can operate only on VV_ROOT nodes. */ 259 if ((vp->v_vflag & VV_ROOT) == 0) { 260 error = EINVAL; 261 goto out; 262 } 263 264 /* 265 * We only allow the filesystem to be reloaded if it 266 * is currently mounted read-only. Additionally, we 267 * prevent read-write to read-only downgrades. 268 */ 269 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 270 (mp->mnt_flag & MNT_RDONLY) == 0 && 271 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 272 error = EOPNOTSUPP; /* Needs translation */ 273 goto out; 274 } 275 276 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 277 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 278 if (error) 279 goto out; 280 281 if (vfs_busy(mp, NULL)) { 282 error = EPERM; 283 goto out; 284 } 285 286 mutex_enter(&mp->mnt_updating); 287 288 mp->mnt_flag &= ~MNT_OP_FLAGS; 289 mp->mnt_flag |= flags & MNT_OP_FLAGS; 290 291 /* 292 * Set the mount level flags. 293 */ 294 if (flags & MNT_RDONLY) 295 mp->mnt_flag |= MNT_RDONLY; 296 else if (mp->mnt_flag & MNT_RDONLY) 297 mp->mnt_iflag |= IMNT_WANTRDWR; 298 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 299 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 300 error = VFS_MOUNT(mp, path, data, data_len); 301 302 if (error && data != NULL) { 303 int error2; 304 305 /* 306 * Update failed; let's try and see if it was an 307 * export request. For compat with 3.0 and earlier. 308 */ 309 error2 = vfs_hooks_reexport(mp, path, data); 310 311 /* 312 * Only update error code if the export request was 313 * understood but some problem occurred while 314 * processing it. 315 */ 316 if (error2 != EJUSTRETURN) 317 error = error2; 318 } 319 320 if (mp->mnt_iflag & IMNT_WANTRDWR) 321 mp->mnt_flag &= ~MNT_RDONLY; 322 if (error) 323 mp->mnt_flag = saved_flags; 324 mp->mnt_flag &= ~MNT_OP_FLAGS; 325 mp->mnt_iflag &= ~IMNT_WANTRDWR; 326 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 327 if (mp->mnt_syncer == NULL) 328 error = vfs_allocate_syncvnode(mp); 329 } else { 330 if (mp->mnt_syncer != NULL) 331 vfs_deallocate_syncvnode(mp); 332 } 333 mutex_exit(&mp->mnt_updating); 334 vfs_unbusy(mp, false, NULL); 335 336 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 337 (flags & MNT_EXTATTR)) { 338 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 339 NULL, 0, NULL) != 0) { 340 printf("%s: failed to start extattr, error = %d", 341 mp->mnt_stat.f_mntonname, error); 342 mp->mnt_flag &= ~MNT_EXTATTR; 343 } 344 } 345 346 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 347 !(flags & MNT_EXTATTR)) { 348 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 349 NULL, 0, NULL) != 0) { 350 printf("%s: failed to stop extattr, error = %d", 351 mp->mnt_stat.f_mntonname, error); 352 mp->mnt_flag |= MNT_RDONLY; 353 } 354 } 355 out: 356 return (error); 357 } 358 359 static int 360 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 361 { 362 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 363 int error; 364 365 /* Copy file-system type from userspace. */ 366 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 367 if (error) { 368 /* 369 * Historically, filesystem types were identified by numbers. 370 * If we get an integer for the filesystem type instead of a 371 * string, we check to see if it matches one of the historic 372 * filesystem types. 373 */ 374 u_long fsindex = (u_long)fstype; 375 if (fsindex >= nmountcompatnames || 376 mountcompatnames[fsindex] == NULL) 377 return ENODEV; 378 strlcpy(fstypename, mountcompatnames[fsindex], 379 sizeof(fstypename)); 380 } 381 382 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 383 if (strcmp(fstypename, "ufs") == 0) 384 fstypename[0] = 'f'; 385 386 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 387 return 0; 388 389 /* If we can autoload a vfs module, try again */ 390 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 391 392 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 393 return 0; 394 395 return ENODEV; 396 } 397 398 static int 399 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 400 void *data, size_t *data_len) 401 { 402 struct mount *mp; 403 int error; 404 405 /* If MNT_GETARGS is specified, it should be the only flag. */ 406 if (flags & ~MNT_GETARGS) 407 return EINVAL; 408 409 mp = vp->v_mount; 410 411 /* XXX: probably some notion of "can see" here if we want isolation. */ 412 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 413 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 414 if (error) 415 return error; 416 417 if ((vp->v_vflag & VV_ROOT) == 0) 418 return EINVAL; 419 420 if (vfs_busy(mp, NULL)) 421 return EPERM; 422 423 mutex_enter(&mp->mnt_updating); 424 mp->mnt_flag &= ~MNT_OP_FLAGS; 425 mp->mnt_flag |= MNT_GETARGS; 426 error = VFS_MOUNT(mp, path, data, data_len); 427 mp->mnt_flag &= ~MNT_OP_FLAGS; 428 mutex_exit(&mp->mnt_updating); 429 430 vfs_unbusy(mp, false, NULL); 431 return (error); 432 } 433 434 int 435 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 436 { 437 /* { 438 syscallarg(const char *) type; 439 syscallarg(const char *) path; 440 syscallarg(int) flags; 441 syscallarg(void *) data; 442 syscallarg(size_t) data_len; 443 } */ 444 445 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 446 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 447 SCARG(uap, data_len), retval); 448 } 449 450 int 451 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 452 const char *path, int flags, void *data, enum uio_seg data_seg, 453 size_t data_len, register_t *retval) 454 { 455 struct vnode *vp; 456 void *data_buf = data; 457 bool vfsopsrele = false; 458 int error; 459 460 /* XXX: The calling convention of this routine is totally bizarre */ 461 if (vfsops) 462 vfsopsrele = true; 463 464 /* 465 * Get vnode to be covered 466 */ 467 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 468 if (error != 0) { 469 vp = NULL; 470 goto done; 471 } 472 473 if (vfsops == NULL) { 474 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 475 vfsops = vp->v_mount->mnt_op; 476 } else { 477 /* 'type' is userspace */ 478 error = mount_get_vfsops(type, &vfsops); 479 if (error != 0) 480 goto done; 481 vfsopsrele = true; 482 } 483 } 484 485 if (data != NULL && data_seg == UIO_USERSPACE) { 486 if (data_len == 0) { 487 /* No length supplied, use default for filesystem */ 488 data_len = vfsops->vfs_min_mount_data; 489 if (data_len > VFS_MAX_MOUNT_DATA) { 490 error = EINVAL; 491 goto done; 492 } 493 /* 494 * Hopefully a longer buffer won't make copyin() fail. 495 * For compatibility with 3.0 and earlier. 496 */ 497 if (flags & MNT_UPDATE 498 && data_len < sizeof (struct mnt_export_args30)) 499 data_len = sizeof (struct mnt_export_args30); 500 } 501 data_buf = kmem_alloc(data_len, KM_SLEEP); 502 503 /* NFS needs the buffer even for mnt_getargs .... */ 504 error = copyin(data, data_buf, data_len); 505 if (error != 0) 506 goto done; 507 } 508 509 if (flags & MNT_GETARGS) { 510 if (data_len == 0) { 511 error = EINVAL; 512 goto done; 513 } 514 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 515 if (error != 0) 516 goto done; 517 if (data_seg == UIO_USERSPACE) 518 error = copyout(data_buf, data, data_len); 519 *retval = data_len; 520 } else if (flags & MNT_UPDATE) { 521 error = mount_update(l, vp, path, flags, data_buf, &data_len); 522 } else { 523 /* Locking is handled internally in mount_domount(). */ 524 KASSERT(vfsopsrele == true); 525 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 526 &data_len); 527 vfsopsrele = false; 528 } 529 530 done: 531 if (vfsopsrele) 532 vfs_delref(vfsops); 533 if (vp != NULL) { 534 vrele(vp); 535 } 536 if (data_buf != data) 537 kmem_free(data_buf, data_len); 538 return (error); 539 } 540 541 /* 542 * Unmount a file system. 543 * 544 * Note: unmount takes a path to the vnode mounted on as argument, 545 * not special file (as before). 546 */ 547 /* ARGSUSED */ 548 int 549 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 550 { 551 /* { 552 syscallarg(const char *) path; 553 syscallarg(int) flags; 554 } */ 555 struct vnode *vp; 556 struct mount *mp; 557 int error; 558 struct pathbuf *pb; 559 struct nameidata nd; 560 561 error = pathbuf_copyin(SCARG(uap, path), &pb); 562 if (error) { 563 return error; 564 } 565 566 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 567 if ((error = namei(&nd)) != 0) { 568 pathbuf_destroy(pb); 569 return error; 570 } 571 vp = nd.ni_vp; 572 pathbuf_destroy(pb); 573 574 mp = vp->v_mount; 575 atomic_inc_uint(&mp->mnt_refcnt); 576 VOP_UNLOCK(vp); 577 578 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 579 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 580 if (error) { 581 vrele(vp); 582 vfs_destroy(mp); 583 return (error); 584 } 585 586 /* 587 * Don't allow unmounting the root file system. 588 */ 589 if (mp->mnt_flag & MNT_ROOTFS) { 590 vrele(vp); 591 vfs_destroy(mp); 592 return (EINVAL); 593 } 594 595 /* 596 * Must be the root of the filesystem 597 */ 598 if ((vp->v_vflag & VV_ROOT) == 0) { 599 vrele(vp); 600 vfs_destroy(mp); 601 return (EINVAL); 602 } 603 604 vrele(vp); 605 error = dounmount(mp, SCARG(uap, flags), l); 606 vfs_destroy(mp); 607 return error; 608 } 609 610 /* 611 * Sync each mounted filesystem. 612 */ 613 #ifdef DEBUG 614 int syncprt = 0; 615 struct ctldebug debug0 = { "syncprt", &syncprt }; 616 #endif 617 618 void 619 do_sys_sync(struct lwp *l) 620 { 621 struct mount *mp, *nmp; 622 int asyncflag; 623 624 mutex_enter(&mountlist_lock); 625 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 626 mp = nmp) { 627 if (vfs_busy(mp, &nmp)) { 628 continue; 629 } 630 mutex_enter(&mp->mnt_updating); 631 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 632 asyncflag = mp->mnt_flag & MNT_ASYNC; 633 mp->mnt_flag &= ~MNT_ASYNC; 634 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 635 if (asyncflag) 636 mp->mnt_flag |= MNT_ASYNC; 637 } 638 mutex_exit(&mp->mnt_updating); 639 vfs_unbusy(mp, false, &nmp); 640 } 641 mutex_exit(&mountlist_lock); 642 #ifdef DEBUG 643 if (syncprt) 644 vfs_bufstats(); 645 #endif /* DEBUG */ 646 } 647 648 /* ARGSUSED */ 649 int 650 sys_sync(struct lwp *l, const void *v, register_t *retval) 651 { 652 do_sys_sync(l); 653 return (0); 654 } 655 656 657 /* 658 * Access or change filesystem quotas. 659 * 660 * (this is really 14 different calls bundled into one) 661 */ 662 663 static int 664 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 665 { 666 struct quotastat info_k; 667 int error; 668 669 /* ensure any padding bytes are cleared */ 670 memset(&info_k, 0, sizeof(info_k)); 671 672 error = vfs_quotactl_stat(mp, &info_k); 673 if (error) { 674 return error; 675 } 676 677 return copyout(&info_k, info_u, sizeof(info_k)); 678 } 679 680 static int 681 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 682 struct quotaidtypestat *info_u) 683 { 684 struct quotaidtypestat info_k; 685 int error; 686 687 /* ensure any padding bytes are cleared */ 688 memset(&info_k, 0, sizeof(info_k)); 689 690 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 691 if (error) { 692 return error; 693 } 694 695 return copyout(&info_k, info_u, sizeof(info_k)); 696 } 697 698 static int 699 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 700 struct quotaobjtypestat *info_u) 701 { 702 struct quotaobjtypestat info_k; 703 int error; 704 705 /* ensure any padding bytes are cleared */ 706 memset(&info_k, 0, sizeof(info_k)); 707 708 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 709 if (error) { 710 return error; 711 } 712 713 return copyout(&info_k, info_u, sizeof(info_k)); 714 } 715 716 static int 717 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 718 struct quotaval *val_u) 719 { 720 struct quotakey key_k; 721 struct quotaval val_k; 722 int error; 723 724 /* ensure any padding bytes are cleared */ 725 memset(&val_k, 0, sizeof(val_k)); 726 727 error = copyin(key_u, &key_k, sizeof(key_k)); 728 if (error) { 729 return error; 730 } 731 732 error = vfs_quotactl_get(mp, &key_k, &val_k); 733 if (error) { 734 return error; 735 } 736 737 return copyout(&val_k, val_u, sizeof(val_k)); 738 } 739 740 static int 741 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 742 const struct quotaval *val_u) 743 { 744 struct quotakey key_k; 745 struct quotaval val_k; 746 int error; 747 748 error = copyin(key_u, &key_k, sizeof(key_k)); 749 if (error) { 750 return error; 751 } 752 753 error = copyin(val_u, &val_k, sizeof(val_k)); 754 if (error) { 755 return error; 756 } 757 758 return vfs_quotactl_put(mp, &key_k, &val_k); 759 } 760 761 static int 762 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u) 763 { 764 struct quotakey key_k; 765 int error; 766 767 error = copyin(key_u, &key_k, sizeof(key_k)); 768 if (error) { 769 return error; 770 } 771 772 return vfs_quotactl_delete(mp, &key_k); 773 } 774 775 static int 776 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 777 { 778 struct quotakcursor cursor_k; 779 int error; 780 781 /* ensure any padding bytes are cleared */ 782 memset(&cursor_k, 0, sizeof(cursor_k)); 783 784 error = vfs_quotactl_cursoropen(mp, &cursor_k); 785 if (error) { 786 return error; 787 } 788 789 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 790 } 791 792 static int 793 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 794 { 795 struct quotakcursor cursor_k; 796 int error; 797 798 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 799 if (error) { 800 return error; 801 } 802 803 return vfs_quotactl_cursorclose(mp, &cursor_k); 804 } 805 806 static int 807 do_sys_quotactl_cursorskipidtype(struct mount *mp, 808 struct quotakcursor *cursor_u, int idtype) 809 { 810 struct quotakcursor cursor_k; 811 int error; 812 813 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 814 if (error) { 815 return error; 816 } 817 818 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 819 if (error) { 820 return error; 821 } 822 823 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 824 } 825 826 static int 827 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 828 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 829 unsigned *ret_u) 830 { 831 #define CGET_STACK_MAX 8 832 struct quotakcursor cursor_k; 833 struct quotakey stackkeys[CGET_STACK_MAX]; 834 struct quotaval stackvals[CGET_STACK_MAX]; 835 struct quotakey *keys_k; 836 struct quotaval *vals_k; 837 unsigned ret_k; 838 int error; 839 840 if (maxnum > 128) { 841 maxnum = 128; 842 } 843 844 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 845 if (error) { 846 return error; 847 } 848 849 if (maxnum <= CGET_STACK_MAX) { 850 keys_k = stackkeys; 851 vals_k = stackvals; 852 /* ensure any padding bytes are cleared */ 853 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 854 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 855 } else { 856 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 857 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 858 } 859 860 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 861 &ret_k); 862 if (error) { 863 goto fail; 864 } 865 866 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 867 if (error) { 868 goto fail; 869 } 870 871 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 872 if (error) { 873 goto fail; 874 } 875 876 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 877 if (error) { 878 goto fail; 879 } 880 881 /* do last to maximize the chance of being able to recover a failure */ 882 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 883 884 fail: 885 if (keys_k != stackkeys) { 886 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 887 } 888 if (vals_k != stackvals) { 889 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 890 } 891 return error; 892 } 893 894 static int 895 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 896 int *ret_u) 897 { 898 struct quotakcursor cursor_k; 899 int ret_k; 900 int error; 901 902 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 903 if (error) { 904 return error; 905 } 906 907 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 908 if (error) { 909 return error; 910 } 911 912 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 913 if (error) { 914 return error; 915 } 916 917 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 918 } 919 920 static int 921 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 922 { 923 struct quotakcursor cursor_k; 924 int error; 925 926 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 927 if (error) { 928 return error; 929 } 930 931 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 932 if (error) { 933 return error; 934 } 935 936 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 937 } 938 939 static int 940 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 941 { 942 char *path_k; 943 int error; 944 945 /* XXX this should probably be a struct pathbuf */ 946 path_k = PNBUF_GET(); 947 error = copyin(path_u, path_k, PATH_MAX); 948 if (error) { 949 PNBUF_PUT(path_k); 950 return error; 951 } 952 953 error = vfs_quotactl_quotaon(mp, idtype, path_k); 954 955 PNBUF_PUT(path_k); 956 return error; 957 } 958 959 static int 960 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 961 { 962 return vfs_quotactl_quotaoff(mp, idtype); 963 } 964 965 int 966 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 967 { 968 struct mount *mp; 969 struct vnode *vp; 970 int error; 971 972 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 973 if (error != 0) 974 return (error); 975 mp = vp->v_mount; 976 977 switch (args->qc_op) { 978 case QUOTACTL_STAT: 979 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 980 break; 981 case QUOTACTL_IDTYPESTAT: 982 error = do_sys_quotactl_idtypestat(mp, 983 args->u.idtypestat.qc_idtype, 984 args->u.idtypestat.qc_info); 985 break; 986 case QUOTACTL_OBJTYPESTAT: 987 error = do_sys_quotactl_objtypestat(mp, 988 args->u.objtypestat.qc_objtype, 989 args->u.objtypestat.qc_info); 990 break; 991 case QUOTACTL_GET: 992 error = do_sys_quotactl_get(mp, 993 args->u.get.qc_key, 994 args->u.get.qc_val); 995 break; 996 case QUOTACTL_PUT: 997 error = do_sys_quotactl_put(mp, 998 args->u.put.qc_key, 999 args->u.put.qc_val); 1000 break; 1001 case QUOTACTL_DELETE: 1002 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key); 1003 break; 1004 case QUOTACTL_CURSOROPEN: 1005 error = do_sys_quotactl_cursoropen(mp, 1006 args->u.cursoropen.qc_cursor); 1007 break; 1008 case QUOTACTL_CURSORCLOSE: 1009 error = do_sys_quotactl_cursorclose(mp, 1010 args->u.cursorclose.qc_cursor); 1011 break; 1012 case QUOTACTL_CURSORSKIPIDTYPE: 1013 error = do_sys_quotactl_cursorskipidtype(mp, 1014 args->u.cursorskipidtype.qc_cursor, 1015 args->u.cursorskipidtype.qc_idtype); 1016 break; 1017 case QUOTACTL_CURSORGET: 1018 error = do_sys_quotactl_cursorget(mp, 1019 args->u.cursorget.qc_cursor, 1020 args->u.cursorget.qc_keys, 1021 args->u.cursorget.qc_vals, 1022 args->u.cursorget.qc_maxnum, 1023 args->u.cursorget.qc_ret); 1024 break; 1025 case QUOTACTL_CURSORATEND: 1026 error = do_sys_quotactl_cursoratend(mp, 1027 args->u.cursoratend.qc_cursor, 1028 args->u.cursoratend.qc_ret); 1029 break; 1030 case QUOTACTL_CURSORREWIND: 1031 error = do_sys_quotactl_cursorrewind(mp, 1032 args->u.cursorrewind.qc_cursor); 1033 break; 1034 case QUOTACTL_QUOTAON: 1035 error = do_sys_quotactl_quotaon(mp, 1036 args->u.quotaon.qc_idtype, 1037 args->u.quotaon.qc_quotafile); 1038 break; 1039 case QUOTACTL_QUOTAOFF: 1040 error = do_sys_quotactl_quotaoff(mp, 1041 args->u.quotaoff.qc_idtype); 1042 break; 1043 default: 1044 error = EINVAL; 1045 break; 1046 } 1047 1048 vrele(vp); 1049 return error; 1050 } 1051 1052 /* ARGSUSED */ 1053 int 1054 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1055 register_t *retval) 1056 { 1057 /* { 1058 syscallarg(const char *) path; 1059 syscallarg(struct quotactl_args *) args; 1060 } */ 1061 struct quotactl_args args; 1062 int error; 1063 1064 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1065 if (error) { 1066 return error; 1067 } 1068 1069 return do_sys_quotactl(SCARG(uap, path), &args); 1070 } 1071 1072 int 1073 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1074 int root) 1075 { 1076 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1077 int error = 0; 1078 1079 /* 1080 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1081 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1082 * overrides MNT_NOWAIT. 1083 */ 1084 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1085 (flags != MNT_WAIT && flags != 0)) { 1086 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1087 goto done; 1088 } 1089 1090 /* Get the filesystem stats now */ 1091 memset(sp, 0, sizeof(*sp)); 1092 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1093 return error; 1094 } 1095 1096 if (cwdi->cwdi_rdir == NULL) 1097 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1098 done: 1099 if (cwdi->cwdi_rdir != NULL) { 1100 size_t len; 1101 char *bp; 1102 char c; 1103 char *path = PNBUF_GET(); 1104 1105 bp = path + MAXPATHLEN; 1106 *--bp = '\0'; 1107 rw_enter(&cwdi->cwdi_lock, RW_READER); 1108 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1109 MAXPATHLEN / 2, 0, l); 1110 rw_exit(&cwdi->cwdi_lock); 1111 if (error) { 1112 PNBUF_PUT(path); 1113 return error; 1114 } 1115 len = strlen(bp); 1116 if (len != 1) { 1117 /* 1118 * for mount points that are below our root, we can see 1119 * them, so we fix up the pathname and return them. The 1120 * rest we cannot see, so we don't allow viewing the 1121 * data. 1122 */ 1123 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1124 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1125 (void)strlcpy(sp->f_mntonname, 1126 c == '\0' ? "/" : &sp->f_mntonname[len], 1127 sizeof(sp->f_mntonname)); 1128 } else { 1129 if (root) 1130 (void)strlcpy(sp->f_mntonname, "/", 1131 sizeof(sp->f_mntonname)); 1132 else 1133 error = EPERM; 1134 } 1135 } 1136 PNBUF_PUT(path); 1137 } 1138 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1139 return error; 1140 } 1141 1142 /* 1143 * Get filesystem statistics by path. 1144 */ 1145 int 1146 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1147 { 1148 struct mount *mp; 1149 int error; 1150 struct vnode *vp; 1151 1152 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1153 if (error != 0) 1154 return error; 1155 mp = vp->v_mount; 1156 error = dostatvfs(mp, sb, l, flags, 1); 1157 vrele(vp); 1158 return error; 1159 } 1160 1161 /* ARGSUSED */ 1162 int 1163 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1164 { 1165 /* { 1166 syscallarg(const char *) path; 1167 syscallarg(struct statvfs *) buf; 1168 syscallarg(int) flags; 1169 } */ 1170 struct statvfs *sb; 1171 int error; 1172 1173 sb = STATVFSBUF_GET(); 1174 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1175 if (error == 0) 1176 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1177 STATVFSBUF_PUT(sb); 1178 return error; 1179 } 1180 1181 /* 1182 * Get filesystem statistics by fd. 1183 */ 1184 int 1185 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1186 { 1187 file_t *fp; 1188 struct mount *mp; 1189 int error; 1190 1191 /* fd_getvnode() will use the descriptor for us */ 1192 if ((error = fd_getvnode(fd, &fp)) != 0) 1193 return (error); 1194 mp = ((struct vnode *)fp->f_data)->v_mount; 1195 error = dostatvfs(mp, sb, curlwp, flags, 1); 1196 fd_putfile(fd); 1197 return error; 1198 } 1199 1200 /* ARGSUSED */ 1201 int 1202 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1203 { 1204 /* { 1205 syscallarg(int) fd; 1206 syscallarg(struct statvfs *) buf; 1207 syscallarg(int) flags; 1208 } */ 1209 struct statvfs *sb; 1210 int error; 1211 1212 sb = STATVFSBUF_GET(); 1213 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1214 if (error == 0) 1215 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1216 STATVFSBUF_PUT(sb); 1217 return error; 1218 } 1219 1220 1221 /* 1222 * Get statistics on all filesystems. 1223 */ 1224 int 1225 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1226 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1227 register_t *retval) 1228 { 1229 int root = 0; 1230 struct proc *p = l->l_proc; 1231 struct mount *mp, *nmp; 1232 struct statvfs *sb; 1233 size_t count, maxcount; 1234 int error = 0; 1235 1236 sb = STATVFSBUF_GET(); 1237 maxcount = bufsize / entry_sz; 1238 mutex_enter(&mountlist_lock); 1239 count = 0; 1240 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1241 mp = nmp) { 1242 if (vfs_busy(mp, &nmp)) { 1243 continue; 1244 } 1245 if (sfsp && count < maxcount) { 1246 error = dostatvfs(mp, sb, l, flags, 0); 1247 if (error) { 1248 vfs_unbusy(mp, false, &nmp); 1249 error = 0; 1250 continue; 1251 } 1252 error = copyfn(sb, sfsp, entry_sz); 1253 if (error) { 1254 vfs_unbusy(mp, false, NULL); 1255 goto out; 1256 } 1257 sfsp = (char *)sfsp + entry_sz; 1258 root |= strcmp(sb->f_mntonname, "/") == 0; 1259 } 1260 count++; 1261 vfs_unbusy(mp, false, &nmp); 1262 } 1263 mutex_exit(&mountlist_lock); 1264 1265 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1266 /* 1267 * fake a root entry 1268 */ 1269 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1270 sb, l, flags, 1); 1271 if (error != 0) 1272 goto out; 1273 if (sfsp) { 1274 error = copyfn(sb, sfsp, entry_sz); 1275 if (error != 0) 1276 goto out; 1277 } 1278 count++; 1279 } 1280 if (sfsp && count > maxcount) 1281 *retval = maxcount; 1282 else 1283 *retval = count; 1284 out: 1285 STATVFSBUF_PUT(sb); 1286 return error; 1287 } 1288 1289 int 1290 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1291 { 1292 /* { 1293 syscallarg(struct statvfs *) buf; 1294 syscallarg(size_t) bufsize; 1295 syscallarg(int) flags; 1296 } */ 1297 1298 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1299 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1300 } 1301 1302 /* 1303 * Change current working directory to a given file descriptor. 1304 */ 1305 /* ARGSUSED */ 1306 int 1307 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1308 { 1309 /* { 1310 syscallarg(int) fd; 1311 } */ 1312 struct proc *p = l->l_proc; 1313 struct cwdinfo *cwdi; 1314 struct vnode *vp, *tdp; 1315 struct mount *mp; 1316 file_t *fp; 1317 int error, fd; 1318 1319 /* fd_getvnode() will use the descriptor for us */ 1320 fd = SCARG(uap, fd); 1321 if ((error = fd_getvnode(fd, &fp)) != 0) 1322 return (error); 1323 vp = fp->f_data; 1324 1325 vref(vp); 1326 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1327 if (vp->v_type != VDIR) 1328 error = ENOTDIR; 1329 else 1330 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1331 if (error) { 1332 vput(vp); 1333 goto out; 1334 } 1335 while ((mp = vp->v_mountedhere) != NULL) { 1336 error = vfs_busy(mp, NULL); 1337 vput(vp); 1338 if (error != 0) 1339 goto out; 1340 error = VFS_ROOT(mp, &tdp); 1341 vfs_unbusy(mp, false, NULL); 1342 if (error) 1343 goto out; 1344 vp = tdp; 1345 } 1346 VOP_UNLOCK(vp); 1347 1348 /* 1349 * Disallow changing to a directory not under the process's 1350 * current root directory (if there is one). 1351 */ 1352 cwdi = p->p_cwdi; 1353 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1354 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1355 vrele(vp); 1356 error = EPERM; /* operation not permitted */ 1357 } else { 1358 vrele(cwdi->cwdi_cdir); 1359 cwdi->cwdi_cdir = vp; 1360 } 1361 rw_exit(&cwdi->cwdi_lock); 1362 1363 out: 1364 fd_putfile(fd); 1365 return (error); 1366 } 1367 1368 /* 1369 * Change this process's notion of the root directory to a given file 1370 * descriptor. 1371 */ 1372 int 1373 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1374 { 1375 struct proc *p = l->l_proc; 1376 struct vnode *vp; 1377 file_t *fp; 1378 int error, fd = SCARG(uap, fd); 1379 1380 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1381 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1382 return error; 1383 /* fd_getvnode() will use the descriptor for us */ 1384 if ((error = fd_getvnode(fd, &fp)) != 0) 1385 return error; 1386 vp = fp->f_data; 1387 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1388 if (vp->v_type != VDIR) 1389 error = ENOTDIR; 1390 else 1391 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1392 VOP_UNLOCK(vp); 1393 if (error) 1394 goto out; 1395 vref(vp); 1396 1397 change_root(p->p_cwdi, vp, l); 1398 1399 out: 1400 fd_putfile(fd); 1401 return (error); 1402 } 1403 1404 /* 1405 * Change current working directory (``.''). 1406 */ 1407 /* ARGSUSED */ 1408 int 1409 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1410 { 1411 /* { 1412 syscallarg(const char *) path; 1413 } */ 1414 struct proc *p = l->l_proc; 1415 struct cwdinfo *cwdi; 1416 int error; 1417 struct vnode *vp; 1418 1419 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1420 &vp, l)) != 0) 1421 return (error); 1422 cwdi = p->p_cwdi; 1423 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1424 vrele(cwdi->cwdi_cdir); 1425 cwdi->cwdi_cdir = vp; 1426 rw_exit(&cwdi->cwdi_lock); 1427 return (0); 1428 } 1429 1430 /* 1431 * Change notion of root (``/'') directory. 1432 */ 1433 /* ARGSUSED */ 1434 int 1435 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1436 { 1437 /* { 1438 syscallarg(const char *) path; 1439 } */ 1440 struct proc *p = l->l_proc; 1441 int error; 1442 struct vnode *vp; 1443 1444 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1445 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1446 return (error); 1447 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1448 &vp, l)) != 0) 1449 return (error); 1450 1451 change_root(p->p_cwdi, vp, l); 1452 1453 return (0); 1454 } 1455 1456 /* 1457 * Common routine for chroot and fchroot. 1458 * NB: callers need to properly authorize the change root operation. 1459 */ 1460 void 1461 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1462 { 1463 struct proc *p = l->l_proc; 1464 kauth_cred_t ncred; 1465 1466 ncred = kauth_cred_alloc(); 1467 1468 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1469 if (cwdi->cwdi_rdir != NULL) 1470 vrele(cwdi->cwdi_rdir); 1471 cwdi->cwdi_rdir = vp; 1472 1473 /* 1474 * Prevent escaping from chroot by putting the root under 1475 * the working directory. Silently chdir to / if we aren't 1476 * already there. 1477 */ 1478 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1479 /* 1480 * XXX would be more failsafe to change directory to a 1481 * deadfs node here instead 1482 */ 1483 vrele(cwdi->cwdi_cdir); 1484 vref(vp); 1485 cwdi->cwdi_cdir = vp; 1486 } 1487 rw_exit(&cwdi->cwdi_lock); 1488 1489 /* Get a write lock on the process credential. */ 1490 proc_crmod_enter(); 1491 1492 kauth_cred_clone(p->p_cred, ncred); 1493 kauth_proc_chroot(ncred, p->p_cwdi); 1494 1495 /* Broadcast our credentials to the process and other LWPs. */ 1496 proc_crmod_leave(ncred, p->p_cred, true); 1497 } 1498 1499 /* 1500 * Common routine for chroot and chdir. 1501 * XXX "where" should be enum uio_seg 1502 */ 1503 int 1504 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1505 { 1506 struct pathbuf *pb; 1507 struct nameidata nd; 1508 int error; 1509 1510 error = pathbuf_maybe_copyin(path, where, &pb); 1511 if (error) { 1512 return error; 1513 } 1514 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1515 if ((error = namei(&nd)) != 0) { 1516 pathbuf_destroy(pb); 1517 return error; 1518 } 1519 *vpp = nd.ni_vp; 1520 pathbuf_destroy(pb); 1521 1522 if ((*vpp)->v_type != VDIR) 1523 error = ENOTDIR; 1524 else 1525 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1526 1527 if (error) 1528 vput(*vpp); 1529 else 1530 VOP_UNLOCK(*vpp); 1531 return (error); 1532 } 1533 1534 /* 1535 * Internals of sys_open - path has already been converted into a pathbuf 1536 * (so we can easily reuse this function from other parts of the kernel, 1537 * like posix_spawn post-processing). 1538 */ 1539 static int 1540 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1541 int open_mode, int *fd) 1542 { 1543 struct proc *p = l->l_proc; 1544 struct cwdinfo *cwdi = p->p_cwdi; 1545 file_t *fp; 1546 struct vnode *vp; 1547 int flags, cmode; 1548 int indx, error; 1549 struct nameidata nd; 1550 1551 if (open_flags & O_SEARCH) { 1552 open_flags &= ~(int)O_SEARCH; 1553 } 1554 1555 flags = FFLAGS(open_flags); 1556 if ((flags & (FREAD | FWRITE)) == 0) 1557 return EINVAL; 1558 1559 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1560 return error; 1561 } 1562 1563 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1564 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1565 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1566 if (dvp != NULL) 1567 NDAT(&nd, dvp); 1568 1569 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1570 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1571 fd_abort(p, fp, indx); 1572 if ((error == EDUPFD || error == EMOVEFD) && 1573 l->l_dupfd >= 0 && /* XXX from fdopen */ 1574 (error = 1575 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1576 *fd = indx; 1577 return 0; 1578 } 1579 if (error == ERESTART) 1580 error = EINTR; 1581 return error; 1582 } 1583 1584 l->l_dupfd = 0; 1585 vp = nd.ni_vp; 1586 1587 if ((error = open_setfp(l, fp, vp, indx, flags))) 1588 return error; 1589 1590 VOP_UNLOCK(vp); 1591 *fd = indx; 1592 fd_affix(p, fp, indx); 1593 return 0; 1594 } 1595 1596 int 1597 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1598 { 1599 struct pathbuf *pb; 1600 int error, oflags; 1601 1602 oflags = FFLAGS(open_flags); 1603 if ((oflags & (FREAD | FWRITE)) == 0) 1604 return EINVAL; 1605 1606 pb = pathbuf_create(path); 1607 if (pb == NULL) 1608 return ENOMEM; 1609 1610 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1611 pathbuf_destroy(pb); 1612 1613 return error; 1614 } 1615 1616 /* 1617 * Check permissions, allocate an open file structure, 1618 * and call the device open routine if any. 1619 */ 1620 static int 1621 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1622 int mode, int *fd) 1623 { 1624 file_t *dfp = NULL; 1625 struct vnode *dvp = NULL; 1626 struct pathbuf *pb; 1627 int error; 1628 1629 error = pathbuf_copyin(path, &pb); 1630 if (error) 1631 return error; 1632 1633 if (fdat != AT_FDCWD) { 1634 /* fd_getvnode() will use the descriptor for us */ 1635 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1636 goto out; 1637 1638 dvp = dfp->f_data; 1639 } 1640 1641 error = do_open(l, dvp, pb, flags, mode, fd); 1642 1643 if (dfp != NULL) 1644 fd_putfile(fdat); 1645 out: 1646 pathbuf_destroy(pb); 1647 return error; 1648 } 1649 1650 int 1651 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1652 { 1653 /* { 1654 syscallarg(const char *) path; 1655 syscallarg(int) flags; 1656 syscallarg(int) mode; 1657 } */ 1658 int error; 1659 int fd; 1660 1661 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1662 SCARG(uap, flags), SCARG(uap, mode), &fd); 1663 1664 if (error == 0) 1665 *retval = fd; 1666 1667 return error; 1668 } 1669 1670 int 1671 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1672 { 1673 /* { 1674 syscallarg(int) fd; 1675 syscallarg(const char *) path; 1676 syscallarg(int) oflags; 1677 syscallarg(int) mode; 1678 } */ 1679 int error; 1680 int fd; 1681 1682 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1683 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1684 1685 if (error == 0) 1686 *retval = fd; 1687 1688 return error; 1689 } 1690 1691 static void 1692 vfs__fhfree(fhandle_t *fhp) 1693 { 1694 size_t fhsize; 1695 1696 if (fhp == NULL) { 1697 return; 1698 } 1699 fhsize = FHANDLE_SIZE(fhp); 1700 kmem_free(fhp, fhsize); 1701 } 1702 1703 /* 1704 * vfs_composefh: compose a filehandle. 1705 */ 1706 1707 int 1708 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1709 { 1710 struct mount *mp; 1711 struct fid *fidp; 1712 int error; 1713 size_t needfhsize; 1714 size_t fidsize; 1715 1716 mp = vp->v_mount; 1717 fidp = NULL; 1718 if (*fh_size < FHANDLE_SIZE_MIN) { 1719 fidsize = 0; 1720 } else { 1721 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1722 if (fhp != NULL) { 1723 memset(fhp, 0, *fh_size); 1724 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1725 fidp = &fhp->fh_fid; 1726 } 1727 } 1728 error = VFS_VPTOFH(vp, fidp, &fidsize); 1729 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1730 if (error == 0 && *fh_size < needfhsize) { 1731 error = E2BIG; 1732 } 1733 *fh_size = needfhsize; 1734 return error; 1735 } 1736 1737 int 1738 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1739 { 1740 struct mount *mp; 1741 fhandle_t *fhp; 1742 size_t fhsize; 1743 size_t fidsize; 1744 int error; 1745 1746 *fhpp = NULL; 1747 mp = vp->v_mount; 1748 fidsize = 0; 1749 error = VFS_VPTOFH(vp, NULL, &fidsize); 1750 KASSERT(error != 0); 1751 if (error != E2BIG) { 1752 goto out; 1753 } 1754 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1755 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1756 if (fhp == NULL) { 1757 error = ENOMEM; 1758 goto out; 1759 } 1760 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1761 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1762 if (error == 0) { 1763 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1764 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1765 *fhpp = fhp; 1766 } else { 1767 kmem_free(fhp, fhsize); 1768 } 1769 out: 1770 return error; 1771 } 1772 1773 void 1774 vfs_composefh_free(fhandle_t *fhp) 1775 { 1776 1777 vfs__fhfree(fhp); 1778 } 1779 1780 /* 1781 * vfs_fhtovp: lookup a vnode by a filehandle. 1782 */ 1783 1784 int 1785 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1786 { 1787 struct mount *mp; 1788 int error; 1789 1790 *vpp = NULL; 1791 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1792 if (mp == NULL) { 1793 error = ESTALE; 1794 goto out; 1795 } 1796 if (mp->mnt_op->vfs_fhtovp == NULL) { 1797 error = EOPNOTSUPP; 1798 goto out; 1799 } 1800 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1801 out: 1802 return error; 1803 } 1804 1805 /* 1806 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1807 * the needed size. 1808 */ 1809 1810 int 1811 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1812 { 1813 fhandle_t *fhp; 1814 int error; 1815 1816 *fhpp = NULL; 1817 if (fhsize > FHANDLE_SIZE_MAX) { 1818 return EINVAL; 1819 } 1820 if (fhsize < FHANDLE_SIZE_MIN) { 1821 return EINVAL; 1822 } 1823 again: 1824 fhp = kmem_alloc(fhsize, KM_SLEEP); 1825 if (fhp == NULL) { 1826 return ENOMEM; 1827 } 1828 error = copyin(ufhp, fhp, fhsize); 1829 if (error == 0) { 1830 /* XXX this check shouldn't be here */ 1831 if (FHANDLE_SIZE(fhp) == fhsize) { 1832 *fhpp = fhp; 1833 return 0; 1834 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1835 /* 1836 * a kludge for nfsv2 padded handles. 1837 */ 1838 size_t sz; 1839 1840 sz = FHANDLE_SIZE(fhp); 1841 kmem_free(fhp, fhsize); 1842 fhsize = sz; 1843 goto again; 1844 } else { 1845 /* 1846 * userland told us wrong size. 1847 */ 1848 error = EINVAL; 1849 } 1850 } 1851 kmem_free(fhp, fhsize); 1852 return error; 1853 } 1854 1855 void 1856 vfs_copyinfh_free(fhandle_t *fhp) 1857 { 1858 1859 vfs__fhfree(fhp); 1860 } 1861 1862 /* 1863 * Get file handle system call 1864 */ 1865 int 1866 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1867 { 1868 /* { 1869 syscallarg(char *) fname; 1870 syscallarg(fhandle_t *) fhp; 1871 syscallarg(size_t *) fh_size; 1872 } */ 1873 struct vnode *vp; 1874 fhandle_t *fh; 1875 int error; 1876 struct pathbuf *pb; 1877 struct nameidata nd; 1878 size_t sz; 1879 size_t usz; 1880 1881 /* 1882 * Must be super user 1883 */ 1884 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1885 0, NULL, NULL, NULL); 1886 if (error) 1887 return (error); 1888 1889 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1890 if (error) { 1891 return error; 1892 } 1893 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1894 error = namei(&nd); 1895 if (error) { 1896 pathbuf_destroy(pb); 1897 return error; 1898 } 1899 vp = nd.ni_vp; 1900 pathbuf_destroy(pb); 1901 1902 error = vfs_composefh_alloc(vp, &fh); 1903 vput(vp); 1904 if (error != 0) { 1905 goto out; 1906 } 1907 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1908 if (error != 0) { 1909 goto out; 1910 } 1911 sz = FHANDLE_SIZE(fh); 1912 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1913 if (error != 0) { 1914 goto out; 1915 } 1916 if (usz >= sz) { 1917 error = copyout(fh, SCARG(uap, fhp), sz); 1918 } else { 1919 error = E2BIG; 1920 } 1921 out: 1922 vfs_composefh_free(fh); 1923 return (error); 1924 } 1925 1926 /* 1927 * Open a file given a file handle. 1928 * 1929 * Check permissions, allocate an open file structure, 1930 * and call the device open routine if any. 1931 */ 1932 1933 int 1934 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1935 register_t *retval) 1936 { 1937 file_t *fp; 1938 struct vnode *vp = NULL; 1939 kauth_cred_t cred = l->l_cred; 1940 file_t *nfp; 1941 int indx, error = 0; 1942 struct vattr va; 1943 fhandle_t *fh; 1944 int flags; 1945 proc_t *p; 1946 1947 p = curproc; 1948 1949 /* 1950 * Must be super user 1951 */ 1952 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1953 0, NULL, NULL, NULL))) 1954 return (error); 1955 1956 if (oflags & O_SEARCH) { 1957 oflags &= ~(int)O_SEARCH; 1958 } 1959 1960 flags = FFLAGS(oflags); 1961 if ((flags & (FREAD | FWRITE)) == 0) 1962 return (EINVAL); 1963 if ((flags & O_CREAT)) 1964 return (EINVAL); 1965 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1966 return (error); 1967 fp = nfp; 1968 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1969 if (error != 0) { 1970 goto bad; 1971 } 1972 error = vfs_fhtovp(fh, &vp); 1973 if (error != 0) { 1974 goto bad; 1975 } 1976 1977 /* Now do an effective vn_open */ 1978 1979 if (vp->v_type == VSOCK) { 1980 error = EOPNOTSUPP; 1981 goto bad; 1982 } 1983 error = vn_openchk(vp, cred, flags); 1984 if (error != 0) 1985 goto bad; 1986 if (flags & O_TRUNC) { 1987 VOP_UNLOCK(vp); /* XXX */ 1988 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1989 vattr_null(&va); 1990 va.va_size = 0; 1991 error = VOP_SETATTR(vp, &va, cred); 1992 if (error) 1993 goto bad; 1994 } 1995 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1996 goto bad; 1997 if (flags & FWRITE) { 1998 mutex_enter(vp->v_interlock); 1999 vp->v_writecount++; 2000 mutex_exit(vp->v_interlock); 2001 } 2002 2003 /* done with modified vn_open, now finish what sys_open does. */ 2004 if ((error = open_setfp(l, fp, vp, indx, flags))) 2005 return error; 2006 2007 VOP_UNLOCK(vp); 2008 *retval = indx; 2009 fd_affix(p, fp, indx); 2010 vfs_copyinfh_free(fh); 2011 return (0); 2012 2013 bad: 2014 fd_abort(p, fp, indx); 2015 if (vp != NULL) 2016 vput(vp); 2017 vfs_copyinfh_free(fh); 2018 return (error); 2019 } 2020 2021 int 2022 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2023 { 2024 /* { 2025 syscallarg(const void *) fhp; 2026 syscallarg(size_t) fh_size; 2027 syscallarg(int) flags; 2028 } */ 2029 2030 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2031 SCARG(uap, flags), retval); 2032 } 2033 2034 int 2035 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2036 { 2037 int error; 2038 fhandle_t *fh; 2039 struct vnode *vp; 2040 2041 /* 2042 * Must be super user 2043 */ 2044 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2045 0, NULL, NULL, NULL))) 2046 return (error); 2047 2048 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2049 if (error != 0) 2050 return error; 2051 2052 error = vfs_fhtovp(fh, &vp); 2053 vfs_copyinfh_free(fh); 2054 if (error != 0) 2055 return error; 2056 2057 error = vn_stat(vp, sb); 2058 vput(vp); 2059 return error; 2060 } 2061 2062 2063 /* ARGSUSED */ 2064 int 2065 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2066 { 2067 /* { 2068 syscallarg(const void *) fhp; 2069 syscallarg(size_t) fh_size; 2070 syscallarg(struct stat *) sb; 2071 } */ 2072 struct stat sb; 2073 int error; 2074 2075 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2076 if (error) 2077 return error; 2078 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2079 } 2080 2081 int 2082 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2083 int flags) 2084 { 2085 fhandle_t *fh; 2086 struct mount *mp; 2087 struct vnode *vp; 2088 int error; 2089 2090 /* 2091 * Must be super user 2092 */ 2093 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2094 0, NULL, NULL, NULL))) 2095 return error; 2096 2097 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2098 if (error != 0) 2099 return error; 2100 2101 error = vfs_fhtovp(fh, &vp); 2102 vfs_copyinfh_free(fh); 2103 if (error != 0) 2104 return error; 2105 2106 mp = vp->v_mount; 2107 error = dostatvfs(mp, sb, l, flags, 1); 2108 vput(vp); 2109 return error; 2110 } 2111 2112 /* ARGSUSED */ 2113 int 2114 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2115 { 2116 /* { 2117 syscallarg(const void *) fhp; 2118 syscallarg(size_t) fh_size; 2119 syscallarg(struct statvfs *) buf; 2120 syscallarg(int) flags; 2121 } */ 2122 struct statvfs *sb = STATVFSBUF_GET(); 2123 int error; 2124 2125 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2126 SCARG(uap, flags)); 2127 if (error == 0) 2128 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2129 STATVFSBUF_PUT(sb); 2130 return error; 2131 } 2132 2133 /* 2134 * Create a special file. 2135 */ 2136 /* ARGSUSED */ 2137 int 2138 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2139 register_t *retval) 2140 { 2141 /* { 2142 syscallarg(const char *) path; 2143 syscallarg(mode_t) mode; 2144 syscallarg(dev_t) dev; 2145 } */ 2146 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2147 SCARG(uap, dev), retval, UIO_USERSPACE); 2148 } 2149 2150 int 2151 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2152 register_t *retval) 2153 { 2154 /* { 2155 syscallarg(int) fd; 2156 syscallarg(const char *) path; 2157 syscallarg(mode_t) mode; 2158 syscallarg(int) pad; 2159 syscallarg(dev_t) dev; 2160 } */ 2161 2162 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2163 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2164 } 2165 2166 int 2167 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2168 register_t *retval, enum uio_seg seg) 2169 { 2170 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2171 } 2172 2173 int 2174 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2175 dev_t dev, register_t *retval, enum uio_seg seg) 2176 { 2177 struct proc *p = l->l_proc; 2178 struct vnode *vp; 2179 struct vattr vattr; 2180 int error, optype; 2181 struct pathbuf *pb; 2182 struct nameidata nd; 2183 const char *pathstring; 2184 2185 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2186 0, NULL, NULL, NULL)) != 0) 2187 return (error); 2188 2189 optype = VOP_MKNOD_DESCOFFSET; 2190 2191 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2192 if (error) { 2193 return error; 2194 } 2195 pathstring = pathbuf_stringcopy_get(pb); 2196 if (pathstring == NULL) { 2197 pathbuf_destroy(pb); 2198 return ENOMEM; 2199 } 2200 2201 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2202 2203 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2204 goto out; 2205 vp = nd.ni_vp; 2206 2207 if (vp != NULL) 2208 error = EEXIST; 2209 else { 2210 vattr_null(&vattr); 2211 /* We will read cwdi->cwdi_cmask unlocked. */ 2212 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2213 vattr.va_rdev = dev; 2214 2215 switch (mode & S_IFMT) { 2216 case S_IFMT: /* used by badsect to flag bad sectors */ 2217 vattr.va_type = VBAD; 2218 break; 2219 case S_IFCHR: 2220 vattr.va_type = VCHR; 2221 break; 2222 case S_IFBLK: 2223 vattr.va_type = VBLK; 2224 break; 2225 case S_IFWHT: 2226 optype = VOP_WHITEOUT_DESCOFFSET; 2227 break; 2228 case S_IFREG: 2229 #if NVERIEXEC > 0 2230 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2231 O_CREAT); 2232 #endif /* NVERIEXEC > 0 */ 2233 vattr.va_type = VREG; 2234 vattr.va_rdev = VNOVAL; 2235 optype = VOP_CREATE_DESCOFFSET; 2236 break; 2237 default: 2238 error = EINVAL; 2239 break; 2240 } 2241 } 2242 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2243 && vattr.va_rdev == VNOVAL) 2244 error = EINVAL; 2245 if (!error) { 2246 switch (optype) { 2247 case VOP_WHITEOUT_DESCOFFSET: 2248 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2249 if (error) 2250 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2251 vput(nd.ni_dvp); 2252 break; 2253 2254 case VOP_MKNOD_DESCOFFSET: 2255 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2256 &nd.ni_cnd, &vattr); 2257 if (error == 0) 2258 vput(nd.ni_vp); 2259 break; 2260 2261 case VOP_CREATE_DESCOFFSET: 2262 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2263 &nd.ni_cnd, &vattr); 2264 if (error == 0) 2265 vput(nd.ni_vp); 2266 break; 2267 } 2268 } else { 2269 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2270 if (nd.ni_dvp == vp) 2271 vrele(nd.ni_dvp); 2272 else 2273 vput(nd.ni_dvp); 2274 if (vp) 2275 vrele(vp); 2276 } 2277 out: 2278 pathbuf_stringcopy_put(pb, pathstring); 2279 pathbuf_destroy(pb); 2280 return (error); 2281 } 2282 2283 /* 2284 * Create a named pipe. 2285 */ 2286 /* ARGSUSED */ 2287 int 2288 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2289 { 2290 /* { 2291 syscallarg(const char *) path; 2292 syscallarg(int) mode; 2293 } */ 2294 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2295 } 2296 2297 int 2298 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2299 register_t *retval) 2300 { 2301 /* { 2302 syscallarg(int) fd; 2303 syscallarg(const char *) path; 2304 syscallarg(int) mode; 2305 } */ 2306 2307 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2308 SCARG(uap, mode)); 2309 } 2310 2311 static int 2312 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2313 { 2314 struct proc *p = l->l_proc; 2315 struct vattr vattr; 2316 int error; 2317 struct pathbuf *pb; 2318 struct nameidata nd; 2319 2320 error = pathbuf_copyin(path, &pb); 2321 if (error) { 2322 return error; 2323 } 2324 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2325 2326 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2327 pathbuf_destroy(pb); 2328 return error; 2329 } 2330 if (nd.ni_vp != NULL) { 2331 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2332 if (nd.ni_dvp == nd.ni_vp) 2333 vrele(nd.ni_dvp); 2334 else 2335 vput(nd.ni_dvp); 2336 vrele(nd.ni_vp); 2337 pathbuf_destroy(pb); 2338 return (EEXIST); 2339 } 2340 vattr_null(&vattr); 2341 vattr.va_type = VFIFO; 2342 /* We will read cwdi->cwdi_cmask unlocked. */ 2343 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2344 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2345 if (error == 0) 2346 vput(nd.ni_vp); 2347 pathbuf_destroy(pb); 2348 return (error); 2349 } 2350 2351 /* 2352 * Make a hard file link. 2353 */ 2354 /* ARGSUSED */ 2355 int 2356 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2357 const char *link, int follow, register_t *retval) 2358 { 2359 struct vnode *vp; 2360 struct pathbuf *linkpb; 2361 struct nameidata nd; 2362 namei_simple_flags_t ns_flags; 2363 int error; 2364 2365 if (follow & AT_SYMLINK_FOLLOW) 2366 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2367 else 2368 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2369 2370 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2371 if (error != 0) 2372 return (error); 2373 error = pathbuf_copyin(link, &linkpb); 2374 if (error) { 2375 goto out1; 2376 } 2377 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2378 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2379 goto out2; 2380 if (nd.ni_vp) { 2381 error = EEXIST; 2382 goto abortop; 2383 } 2384 /* Prevent hard links on directories. */ 2385 if (vp->v_type == VDIR) { 2386 error = EPERM; 2387 goto abortop; 2388 } 2389 /* Prevent cross-mount operation. */ 2390 if (nd.ni_dvp->v_mount != vp->v_mount) { 2391 error = EXDEV; 2392 goto abortop; 2393 } 2394 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2395 out2: 2396 pathbuf_destroy(linkpb); 2397 out1: 2398 vrele(vp); 2399 return (error); 2400 abortop: 2401 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2402 if (nd.ni_dvp == nd.ni_vp) 2403 vrele(nd.ni_dvp); 2404 else 2405 vput(nd.ni_dvp); 2406 if (nd.ni_vp != NULL) 2407 vrele(nd.ni_vp); 2408 goto out2; 2409 } 2410 2411 int 2412 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2413 { 2414 /* { 2415 syscallarg(const char *) path; 2416 syscallarg(const char *) link; 2417 } */ 2418 const char *path = SCARG(uap, path); 2419 const char *link = SCARG(uap, link); 2420 2421 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2422 AT_SYMLINK_FOLLOW, retval); 2423 } 2424 2425 int 2426 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2427 register_t *retval) 2428 { 2429 /* { 2430 syscallarg(int) fd1; 2431 syscallarg(const char *) name1; 2432 syscallarg(int) fd2; 2433 syscallarg(const char *) name2; 2434 syscallarg(int) flags; 2435 } */ 2436 int fd1 = SCARG(uap, fd1); 2437 const char *name1 = SCARG(uap, name1); 2438 int fd2 = SCARG(uap, fd2); 2439 const char *name2 = SCARG(uap, name2); 2440 int follow; 2441 2442 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2443 2444 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2445 } 2446 2447 2448 int 2449 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2450 { 2451 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2452 } 2453 2454 static int 2455 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2456 const char *link, enum uio_seg seg) 2457 { 2458 struct proc *p = curproc; 2459 struct vattr vattr; 2460 char *path; 2461 int error; 2462 struct pathbuf *linkpb; 2463 struct nameidata nd; 2464 2465 KASSERT(l != NULL || fdat == AT_FDCWD); 2466 2467 path = PNBUF_GET(); 2468 if (seg == UIO_USERSPACE) { 2469 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2470 goto out1; 2471 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2472 goto out1; 2473 } else { 2474 KASSERT(strlen(patharg) < MAXPATHLEN); 2475 strcpy(path, patharg); 2476 linkpb = pathbuf_create(link); 2477 if (linkpb == NULL) { 2478 error = ENOMEM; 2479 goto out1; 2480 } 2481 } 2482 ktrkuser("symlink-target", path, strlen(path)); 2483 2484 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2485 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2486 goto out2; 2487 if (nd.ni_vp) { 2488 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2489 if (nd.ni_dvp == nd.ni_vp) 2490 vrele(nd.ni_dvp); 2491 else 2492 vput(nd.ni_dvp); 2493 vrele(nd.ni_vp); 2494 error = EEXIST; 2495 goto out2; 2496 } 2497 vattr_null(&vattr); 2498 vattr.va_type = VLNK; 2499 /* We will read cwdi->cwdi_cmask unlocked. */ 2500 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2501 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2502 if (error == 0) 2503 vput(nd.ni_vp); 2504 out2: 2505 pathbuf_destroy(linkpb); 2506 out1: 2507 PNBUF_PUT(path); 2508 return (error); 2509 } 2510 2511 /* 2512 * Make a symbolic link. 2513 */ 2514 /* ARGSUSED */ 2515 int 2516 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2517 { 2518 /* { 2519 syscallarg(const char *) path; 2520 syscallarg(const char *) link; 2521 } */ 2522 2523 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2524 UIO_USERSPACE); 2525 } 2526 2527 int 2528 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2529 register_t *retval) 2530 { 2531 /* { 2532 syscallarg(const char *) path1; 2533 syscallarg(int) fd; 2534 syscallarg(const char *) path2; 2535 } */ 2536 2537 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2538 SCARG(uap, path2), UIO_USERSPACE); 2539 } 2540 2541 /* 2542 * Delete a whiteout from the filesystem. 2543 */ 2544 /* ARGSUSED */ 2545 int 2546 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2547 { 2548 /* { 2549 syscallarg(const char *) path; 2550 } */ 2551 int error; 2552 struct pathbuf *pb; 2553 struct nameidata nd; 2554 2555 error = pathbuf_copyin(SCARG(uap, path), &pb); 2556 if (error) { 2557 return error; 2558 } 2559 2560 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2561 error = namei(&nd); 2562 if (error) { 2563 pathbuf_destroy(pb); 2564 return (error); 2565 } 2566 2567 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2568 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2569 if (nd.ni_dvp == nd.ni_vp) 2570 vrele(nd.ni_dvp); 2571 else 2572 vput(nd.ni_dvp); 2573 if (nd.ni_vp) 2574 vrele(nd.ni_vp); 2575 pathbuf_destroy(pb); 2576 return (EEXIST); 2577 } 2578 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2579 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2580 vput(nd.ni_dvp); 2581 pathbuf_destroy(pb); 2582 return (error); 2583 } 2584 2585 /* 2586 * Delete a name from the filesystem. 2587 */ 2588 /* ARGSUSED */ 2589 int 2590 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2591 { 2592 /* { 2593 syscallarg(const char *) path; 2594 } */ 2595 2596 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2597 } 2598 2599 int 2600 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2601 register_t *retval) 2602 { 2603 /* { 2604 syscallarg(int) fd; 2605 syscallarg(const char *) path; 2606 syscallarg(int) flag; 2607 } */ 2608 2609 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2610 SCARG(uap, flag), UIO_USERSPACE); 2611 } 2612 2613 int 2614 do_sys_unlink(const char *arg, enum uio_seg seg) 2615 { 2616 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2617 } 2618 2619 static int 2620 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2621 enum uio_seg seg) 2622 { 2623 struct vnode *vp; 2624 int error; 2625 struct pathbuf *pb; 2626 struct nameidata nd; 2627 const char *pathstring; 2628 2629 KASSERT(l != NULL || fdat == AT_FDCWD); 2630 2631 error = pathbuf_maybe_copyin(arg, seg, &pb); 2632 if (error) { 2633 return error; 2634 } 2635 pathstring = pathbuf_stringcopy_get(pb); 2636 if (pathstring == NULL) { 2637 pathbuf_destroy(pb); 2638 return ENOMEM; 2639 } 2640 2641 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2642 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2643 goto out; 2644 vp = nd.ni_vp; 2645 2646 /* 2647 * The root of a mounted filesystem cannot be deleted. 2648 */ 2649 if ((vp->v_vflag & VV_ROOT) != 0) { 2650 error = EBUSY; 2651 goto abort; 2652 } 2653 2654 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2655 error = EBUSY; 2656 goto abort; 2657 } 2658 2659 /* 2660 * No rmdir "." please. 2661 */ 2662 if (nd.ni_dvp == vp) { 2663 error = EINVAL; 2664 goto abort; 2665 } 2666 2667 /* 2668 * AT_REMOVEDIR is required to remove a directory 2669 */ 2670 if (vp->v_type == VDIR) { 2671 if (!(flags & AT_REMOVEDIR)) { 2672 error = EPERM; 2673 goto abort; 2674 } else { 2675 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2676 goto out; 2677 } 2678 } 2679 2680 /* 2681 * Starting here we only deal with non directories. 2682 */ 2683 if (flags & AT_REMOVEDIR) { 2684 error = ENOTDIR; 2685 goto abort; 2686 } 2687 2688 2689 #if NVERIEXEC > 0 2690 /* Handle remove requests for veriexec entries. */ 2691 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2692 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2693 if (nd.ni_dvp == vp) 2694 vrele(nd.ni_dvp); 2695 else 2696 vput(nd.ni_dvp); 2697 vput(vp); 2698 goto out; 2699 } 2700 #endif /* NVERIEXEC > 0 */ 2701 2702 #ifdef FILEASSOC 2703 (void)fileassoc_file_delete(vp); 2704 #endif /* FILEASSOC */ 2705 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2706 goto out; 2707 2708 abort: 2709 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2710 if (nd.ni_dvp == vp) 2711 vrele(nd.ni_dvp); 2712 else 2713 vput(nd.ni_dvp); 2714 vput(vp); 2715 2716 out: 2717 pathbuf_stringcopy_put(pb, pathstring); 2718 pathbuf_destroy(pb); 2719 return (error); 2720 } 2721 2722 /* 2723 * Reposition read/write file offset. 2724 */ 2725 int 2726 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2727 { 2728 /* { 2729 syscallarg(int) fd; 2730 syscallarg(int) pad; 2731 syscallarg(off_t) offset; 2732 syscallarg(int) whence; 2733 } */ 2734 kauth_cred_t cred = l->l_cred; 2735 file_t *fp; 2736 struct vnode *vp; 2737 struct vattr vattr; 2738 off_t newoff; 2739 int error, fd; 2740 2741 fd = SCARG(uap, fd); 2742 2743 if ((fp = fd_getfile(fd)) == NULL) 2744 return (EBADF); 2745 2746 vp = fp->f_data; 2747 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2748 error = ESPIPE; 2749 goto out; 2750 } 2751 2752 switch (SCARG(uap, whence)) { 2753 case SEEK_CUR: 2754 newoff = fp->f_offset + SCARG(uap, offset); 2755 break; 2756 case SEEK_END: 2757 vn_lock(vp, LK_SHARED | LK_RETRY); 2758 error = VOP_GETATTR(vp, &vattr, cred); 2759 VOP_UNLOCK(vp); 2760 if (error) { 2761 goto out; 2762 } 2763 newoff = SCARG(uap, offset) + vattr.va_size; 2764 break; 2765 case SEEK_SET: 2766 newoff = SCARG(uap, offset); 2767 break; 2768 default: 2769 error = EINVAL; 2770 goto out; 2771 } 2772 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2773 *(off_t *)retval = fp->f_offset = newoff; 2774 } 2775 out: 2776 fd_putfile(fd); 2777 return (error); 2778 } 2779 2780 /* 2781 * Positional read system call. 2782 */ 2783 int 2784 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2785 { 2786 /* { 2787 syscallarg(int) fd; 2788 syscallarg(void *) buf; 2789 syscallarg(size_t) nbyte; 2790 syscallarg(off_t) offset; 2791 } */ 2792 file_t *fp; 2793 struct vnode *vp; 2794 off_t offset; 2795 int error, fd = SCARG(uap, fd); 2796 2797 if ((fp = fd_getfile(fd)) == NULL) 2798 return (EBADF); 2799 2800 if ((fp->f_flag & FREAD) == 0) { 2801 fd_putfile(fd); 2802 return (EBADF); 2803 } 2804 2805 vp = fp->f_data; 2806 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2807 error = ESPIPE; 2808 goto out; 2809 } 2810 2811 offset = SCARG(uap, offset); 2812 2813 /* 2814 * XXX This works because no file systems actually 2815 * XXX take any action on the seek operation. 2816 */ 2817 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2818 goto out; 2819 2820 /* dofileread() will unuse the descriptor for us */ 2821 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2822 &offset, 0, retval)); 2823 2824 out: 2825 fd_putfile(fd); 2826 return (error); 2827 } 2828 2829 /* 2830 * Positional scatter read system call. 2831 */ 2832 int 2833 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2834 { 2835 /* { 2836 syscallarg(int) fd; 2837 syscallarg(const struct iovec *) iovp; 2838 syscallarg(int) iovcnt; 2839 syscallarg(off_t) offset; 2840 } */ 2841 off_t offset = SCARG(uap, offset); 2842 2843 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2844 SCARG(uap, iovcnt), &offset, 0, retval); 2845 } 2846 2847 /* 2848 * Positional write system call. 2849 */ 2850 int 2851 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2852 { 2853 /* { 2854 syscallarg(int) fd; 2855 syscallarg(const void *) buf; 2856 syscallarg(size_t) nbyte; 2857 syscallarg(off_t) offset; 2858 } */ 2859 file_t *fp; 2860 struct vnode *vp; 2861 off_t offset; 2862 int error, fd = SCARG(uap, fd); 2863 2864 if ((fp = fd_getfile(fd)) == NULL) 2865 return (EBADF); 2866 2867 if ((fp->f_flag & FWRITE) == 0) { 2868 fd_putfile(fd); 2869 return (EBADF); 2870 } 2871 2872 vp = fp->f_data; 2873 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2874 error = ESPIPE; 2875 goto out; 2876 } 2877 2878 offset = SCARG(uap, offset); 2879 2880 /* 2881 * XXX This works because no file systems actually 2882 * XXX take any action on the seek operation. 2883 */ 2884 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2885 goto out; 2886 2887 /* dofilewrite() will unuse the descriptor for us */ 2888 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2889 &offset, 0, retval)); 2890 2891 out: 2892 fd_putfile(fd); 2893 return (error); 2894 } 2895 2896 /* 2897 * Positional gather write system call. 2898 */ 2899 int 2900 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2901 { 2902 /* { 2903 syscallarg(int) fd; 2904 syscallarg(const struct iovec *) iovp; 2905 syscallarg(int) iovcnt; 2906 syscallarg(off_t) offset; 2907 } */ 2908 off_t offset = SCARG(uap, offset); 2909 2910 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2911 SCARG(uap, iovcnt), &offset, 0, retval); 2912 } 2913 2914 /* 2915 * Check access permissions. 2916 */ 2917 int 2918 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2919 { 2920 /* { 2921 syscallarg(const char *) path; 2922 syscallarg(int) flags; 2923 } */ 2924 2925 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2926 SCARG(uap, flags), 0); 2927 } 2928 2929 int 2930 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2931 int mode, int flags) 2932 { 2933 kauth_cred_t cred; 2934 struct vnode *vp; 2935 int error, nd_flag, vmode; 2936 struct pathbuf *pb; 2937 struct nameidata nd; 2938 2939 CTASSERT(F_OK == 0); 2940 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2941 /* nonsense mode */ 2942 return EINVAL; 2943 } 2944 2945 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2946 if (flags & AT_SYMLINK_NOFOLLOW) 2947 nd_flag &= ~FOLLOW; 2948 2949 error = pathbuf_copyin(path, &pb); 2950 if (error) 2951 return error; 2952 2953 NDINIT(&nd, LOOKUP, nd_flag, pb); 2954 2955 /* Override default credentials */ 2956 cred = kauth_cred_dup(l->l_cred); 2957 if (!(flags & AT_EACCESS)) { 2958 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2959 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2960 } 2961 nd.ni_cnd.cn_cred = cred; 2962 2963 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2964 pathbuf_destroy(pb); 2965 goto out; 2966 } 2967 vp = nd.ni_vp; 2968 pathbuf_destroy(pb); 2969 2970 /* Flags == 0 means only check for existence. */ 2971 if (mode) { 2972 vmode = 0; 2973 if (mode & R_OK) 2974 vmode |= VREAD; 2975 if (mode & W_OK) 2976 vmode |= VWRITE; 2977 if (mode & X_OK) 2978 vmode |= VEXEC; 2979 2980 error = VOP_ACCESS(vp, vmode, cred); 2981 if (!error && (vmode & VWRITE)) 2982 error = vn_writechk(vp); 2983 } 2984 vput(vp); 2985 out: 2986 kauth_cred_free(cred); 2987 return (error); 2988 } 2989 2990 int 2991 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 2992 register_t *retval) 2993 { 2994 /* { 2995 syscallarg(int) fd; 2996 syscallarg(const char *) path; 2997 syscallarg(int) amode; 2998 syscallarg(int) flag; 2999 } */ 3000 3001 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3002 SCARG(uap, amode), SCARG(uap, flag)); 3003 } 3004 3005 /* 3006 * Common code for all sys_stat functions, including compat versions. 3007 */ 3008 int 3009 do_sys_stat(const char *userpath, unsigned int nd_flag, 3010 struct stat *sb) 3011 { 3012 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3013 } 3014 3015 int 3016 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3017 unsigned int nd_flag, struct stat *sb) 3018 { 3019 int error; 3020 struct pathbuf *pb; 3021 struct nameidata nd; 3022 3023 KASSERT(l != NULL || fdat == AT_FDCWD); 3024 3025 error = pathbuf_copyin(userpath, &pb); 3026 if (error) { 3027 return error; 3028 } 3029 3030 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3031 3032 error = fd_nameiat(l, fdat, &nd); 3033 if (error != 0) { 3034 pathbuf_destroy(pb); 3035 return error; 3036 } 3037 error = vn_stat(nd.ni_vp, sb); 3038 vput(nd.ni_vp); 3039 pathbuf_destroy(pb); 3040 return error; 3041 } 3042 3043 /* 3044 * Get file status; this version follows links. 3045 */ 3046 /* ARGSUSED */ 3047 int 3048 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3049 { 3050 /* { 3051 syscallarg(const char *) path; 3052 syscallarg(struct stat *) ub; 3053 } */ 3054 struct stat sb; 3055 int error; 3056 3057 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3058 if (error) 3059 return error; 3060 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3061 } 3062 3063 /* 3064 * Get file status; this version does not follow links. 3065 */ 3066 /* ARGSUSED */ 3067 int 3068 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3069 { 3070 /* { 3071 syscallarg(const char *) path; 3072 syscallarg(struct stat *) ub; 3073 } */ 3074 struct stat sb; 3075 int error; 3076 3077 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3078 if (error) 3079 return error; 3080 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3081 } 3082 3083 int 3084 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3085 register_t *retval) 3086 { 3087 /* { 3088 syscallarg(int) fd; 3089 syscallarg(const char *) path; 3090 syscallarg(struct stat *) buf; 3091 syscallarg(int) flag; 3092 } */ 3093 unsigned int nd_flag; 3094 struct stat sb; 3095 int error; 3096 3097 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3098 nd_flag = NOFOLLOW; 3099 else 3100 nd_flag = FOLLOW; 3101 3102 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3103 &sb); 3104 if (error) 3105 return error; 3106 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3107 } 3108 3109 /* 3110 * Get configurable pathname variables. 3111 */ 3112 /* ARGSUSED */ 3113 int 3114 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3115 { 3116 /* { 3117 syscallarg(const char *) path; 3118 syscallarg(int) name; 3119 } */ 3120 int error; 3121 struct pathbuf *pb; 3122 struct nameidata nd; 3123 3124 error = pathbuf_copyin(SCARG(uap, path), &pb); 3125 if (error) { 3126 return error; 3127 } 3128 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3129 if ((error = namei(&nd)) != 0) { 3130 pathbuf_destroy(pb); 3131 return (error); 3132 } 3133 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3134 vput(nd.ni_vp); 3135 pathbuf_destroy(pb); 3136 return (error); 3137 } 3138 3139 /* 3140 * Return target name of a symbolic link. 3141 */ 3142 /* ARGSUSED */ 3143 int 3144 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3145 register_t *retval) 3146 { 3147 /* { 3148 syscallarg(const char *) path; 3149 syscallarg(char *) buf; 3150 syscallarg(size_t) count; 3151 } */ 3152 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3153 SCARG(uap, buf), SCARG(uap, count), retval); 3154 } 3155 3156 static int 3157 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3158 size_t count, register_t *retval) 3159 { 3160 struct vnode *vp; 3161 struct iovec aiov; 3162 struct uio auio; 3163 int error; 3164 struct pathbuf *pb; 3165 struct nameidata nd; 3166 3167 error = pathbuf_copyin(path, &pb); 3168 if (error) { 3169 return error; 3170 } 3171 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3172 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3173 pathbuf_destroy(pb); 3174 return error; 3175 } 3176 vp = nd.ni_vp; 3177 pathbuf_destroy(pb); 3178 if (vp->v_type != VLNK) 3179 error = EINVAL; 3180 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3181 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3182 aiov.iov_base = buf; 3183 aiov.iov_len = count; 3184 auio.uio_iov = &aiov; 3185 auio.uio_iovcnt = 1; 3186 auio.uio_offset = 0; 3187 auio.uio_rw = UIO_READ; 3188 KASSERT(l == curlwp); 3189 auio.uio_vmspace = l->l_proc->p_vmspace; 3190 auio.uio_resid = count; 3191 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3192 *retval = count - auio.uio_resid; 3193 } 3194 vput(vp); 3195 return (error); 3196 } 3197 3198 int 3199 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3200 register_t *retval) 3201 { 3202 /* { 3203 syscallarg(int) fd; 3204 syscallarg(const char *) path; 3205 syscallarg(char *) buf; 3206 syscallarg(size_t) bufsize; 3207 } */ 3208 3209 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3210 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3211 } 3212 3213 /* 3214 * Change flags of a file given a path name. 3215 */ 3216 /* ARGSUSED */ 3217 int 3218 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3219 { 3220 /* { 3221 syscallarg(const char *) path; 3222 syscallarg(u_long) flags; 3223 } */ 3224 struct vnode *vp; 3225 int error; 3226 3227 error = namei_simple_user(SCARG(uap, path), 3228 NSM_FOLLOW_TRYEMULROOT, &vp); 3229 if (error != 0) 3230 return (error); 3231 error = change_flags(vp, SCARG(uap, flags), l); 3232 vput(vp); 3233 return (error); 3234 } 3235 3236 /* 3237 * Change flags of a file given a file descriptor. 3238 */ 3239 /* ARGSUSED */ 3240 int 3241 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3242 { 3243 /* { 3244 syscallarg(int) fd; 3245 syscallarg(u_long) flags; 3246 } */ 3247 struct vnode *vp; 3248 file_t *fp; 3249 int error; 3250 3251 /* fd_getvnode() will use the descriptor for us */ 3252 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3253 return (error); 3254 vp = fp->f_data; 3255 error = change_flags(vp, SCARG(uap, flags), l); 3256 VOP_UNLOCK(vp); 3257 fd_putfile(SCARG(uap, fd)); 3258 return (error); 3259 } 3260 3261 /* 3262 * Change flags of a file given a path name; this version does 3263 * not follow links. 3264 */ 3265 int 3266 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3267 { 3268 /* { 3269 syscallarg(const char *) path; 3270 syscallarg(u_long) flags; 3271 } */ 3272 struct vnode *vp; 3273 int error; 3274 3275 error = namei_simple_user(SCARG(uap, path), 3276 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3277 if (error != 0) 3278 return (error); 3279 error = change_flags(vp, SCARG(uap, flags), l); 3280 vput(vp); 3281 return (error); 3282 } 3283 3284 /* 3285 * Common routine to change flags of a file. 3286 */ 3287 int 3288 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3289 { 3290 struct vattr vattr; 3291 int error; 3292 3293 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3294 3295 vattr_null(&vattr); 3296 vattr.va_flags = flags; 3297 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3298 3299 return (error); 3300 } 3301 3302 /* 3303 * Change mode of a file given path name; this version follows links. 3304 */ 3305 /* ARGSUSED */ 3306 int 3307 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3308 { 3309 /* { 3310 syscallarg(const char *) path; 3311 syscallarg(int) mode; 3312 } */ 3313 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3314 SCARG(uap, mode), 0); 3315 } 3316 3317 int 3318 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3319 { 3320 int error; 3321 struct vnode *vp; 3322 namei_simple_flags_t ns_flag; 3323 3324 if (flags & AT_SYMLINK_NOFOLLOW) 3325 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3326 else 3327 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3328 3329 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3330 if (error != 0) 3331 return error; 3332 3333 error = change_mode(vp, mode, l); 3334 3335 vrele(vp); 3336 3337 return (error); 3338 } 3339 3340 /* 3341 * Change mode of a file given a file descriptor. 3342 */ 3343 /* ARGSUSED */ 3344 int 3345 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3346 { 3347 /* { 3348 syscallarg(int) fd; 3349 syscallarg(int) mode; 3350 } */ 3351 file_t *fp; 3352 int error; 3353 3354 /* fd_getvnode() will use the descriptor for us */ 3355 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3356 return (error); 3357 error = change_mode(fp->f_data, SCARG(uap, mode), l); 3358 fd_putfile(SCARG(uap, fd)); 3359 return (error); 3360 } 3361 3362 int 3363 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3364 register_t *retval) 3365 { 3366 /* { 3367 syscallarg(int) fd; 3368 syscallarg(const char *) path; 3369 syscallarg(int) mode; 3370 syscallarg(int) flag; 3371 } */ 3372 3373 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3374 SCARG(uap, mode), SCARG(uap, flag)); 3375 } 3376 3377 /* 3378 * Change mode of a file given path name; this version does not follow links. 3379 */ 3380 /* ARGSUSED */ 3381 int 3382 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3383 { 3384 /* { 3385 syscallarg(const char *) path; 3386 syscallarg(int) mode; 3387 } */ 3388 int error; 3389 struct vnode *vp; 3390 3391 error = namei_simple_user(SCARG(uap, path), 3392 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3393 if (error != 0) 3394 return (error); 3395 3396 error = change_mode(vp, SCARG(uap, mode), l); 3397 3398 vrele(vp); 3399 return (error); 3400 } 3401 3402 /* 3403 * Common routine to set mode given a vnode. 3404 */ 3405 static int 3406 change_mode(struct vnode *vp, int mode, struct lwp *l) 3407 { 3408 struct vattr vattr; 3409 int error; 3410 3411 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3412 vattr_null(&vattr); 3413 vattr.va_mode = mode & ALLPERMS; 3414 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3415 VOP_UNLOCK(vp); 3416 return (error); 3417 } 3418 3419 /* 3420 * Set ownership given a path name; this version follows links. 3421 */ 3422 /* ARGSUSED */ 3423 int 3424 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3425 { 3426 /* { 3427 syscallarg(const char *) path; 3428 syscallarg(uid_t) uid; 3429 syscallarg(gid_t) gid; 3430 } */ 3431 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3432 SCARG(uap, gid), 0); 3433 } 3434 3435 int 3436 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3437 gid_t gid, int flags) 3438 { 3439 int error; 3440 struct vnode *vp; 3441 namei_simple_flags_t ns_flag; 3442 3443 if (flags & AT_SYMLINK_NOFOLLOW) 3444 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3445 else 3446 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3447 3448 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3449 if (error != 0) 3450 return error; 3451 3452 error = change_owner(vp, uid, gid, l, 0); 3453 3454 vrele(vp); 3455 3456 return (error); 3457 } 3458 3459 /* 3460 * Set ownership given a path name; this version follows links. 3461 * Provides POSIX semantics. 3462 */ 3463 /* ARGSUSED */ 3464 int 3465 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3466 { 3467 /* { 3468 syscallarg(const char *) path; 3469 syscallarg(uid_t) uid; 3470 syscallarg(gid_t) gid; 3471 } */ 3472 int error; 3473 struct vnode *vp; 3474 3475 error = namei_simple_user(SCARG(uap, path), 3476 NSM_FOLLOW_TRYEMULROOT, &vp); 3477 if (error != 0) 3478 return (error); 3479 3480 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3481 3482 vrele(vp); 3483 return (error); 3484 } 3485 3486 /* 3487 * Set ownership given a file descriptor. 3488 */ 3489 /* ARGSUSED */ 3490 int 3491 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3492 { 3493 /* { 3494 syscallarg(int) fd; 3495 syscallarg(uid_t) uid; 3496 syscallarg(gid_t) gid; 3497 } */ 3498 int error; 3499 file_t *fp; 3500 3501 /* fd_getvnode() will use the descriptor for us */ 3502 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3503 return (error); 3504 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3505 l, 0); 3506 fd_putfile(SCARG(uap, fd)); 3507 return (error); 3508 } 3509 3510 int 3511 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3512 register_t *retval) 3513 { 3514 /* { 3515 syscallarg(int) fd; 3516 syscallarg(const char *) path; 3517 syscallarg(uid_t) owner; 3518 syscallarg(gid_t) group; 3519 syscallarg(int) flag; 3520 } */ 3521 3522 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3523 SCARG(uap, owner), SCARG(uap, group), 3524 SCARG(uap, flag)); 3525 } 3526 3527 /* 3528 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3529 */ 3530 /* ARGSUSED */ 3531 int 3532 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3533 { 3534 /* { 3535 syscallarg(int) fd; 3536 syscallarg(uid_t) uid; 3537 syscallarg(gid_t) gid; 3538 } */ 3539 int error; 3540 file_t *fp; 3541 3542 /* fd_getvnode() will use the descriptor for us */ 3543 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3544 return (error); 3545 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3546 l, 1); 3547 fd_putfile(SCARG(uap, fd)); 3548 return (error); 3549 } 3550 3551 /* 3552 * Set ownership given a path name; this version does not follow links. 3553 */ 3554 /* ARGSUSED */ 3555 int 3556 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3557 { 3558 /* { 3559 syscallarg(const char *) path; 3560 syscallarg(uid_t) uid; 3561 syscallarg(gid_t) gid; 3562 } */ 3563 int error; 3564 struct vnode *vp; 3565 3566 error = namei_simple_user(SCARG(uap, path), 3567 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3568 if (error != 0) 3569 return (error); 3570 3571 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3572 3573 vrele(vp); 3574 return (error); 3575 } 3576 3577 /* 3578 * Set ownership given a path name; this version does not follow links. 3579 * Provides POSIX/XPG semantics. 3580 */ 3581 /* ARGSUSED */ 3582 int 3583 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3584 { 3585 /* { 3586 syscallarg(const char *) path; 3587 syscallarg(uid_t) uid; 3588 syscallarg(gid_t) gid; 3589 } */ 3590 int error; 3591 struct vnode *vp; 3592 3593 error = namei_simple_user(SCARG(uap, path), 3594 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3595 if (error != 0) 3596 return (error); 3597 3598 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3599 3600 vrele(vp); 3601 return (error); 3602 } 3603 3604 /* 3605 * Common routine to set ownership given a vnode. 3606 */ 3607 static int 3608 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3609 int posix_semantics) 3610 { 3611 struct vattr vattr; 3612 mode_t newmode; 3613 int error; 3614 3615 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3616 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3617 goto out; 3618 3619 #define CHANGED(x) ((int)(x) != -1) 3620 newmode = vattr.va_mode; 3621 if (posix_semantics) { 3622 /* 3623 * POSIX/XPG semantics: if the caller is not the super-user, 3624 * clear set-user-id and set-group-id bits. Both POSIX and 3625 * the XPG consider the behaviour for calls by the super-user 3626 * implementation-defined; we leave the set-user-id and set- 3627 * group-id settings intact in that case. 3628 */ 3629 if (vattr.va_mode & S_ISUID) { 3630 if (kauth_authorize_vnode(l->l_cred, 3631 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3632 newmode &= ~S_ISUID; 3633 } 3634 if (vattr.va_mode & S_ISGID) { 3635 if (kauth_authorize_vnode(l->l_cred, 3636 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3637 newmode &= ~S_ISGID; 3638 } 3639 } else { 3640 /* 3641 * NetBSD semantics: when changing owner and/or group, 3642 * clear the respective bit(s). 3643 */ 3644 if (CHANGED(uid)) 3645 newmode &= ~S_ISUID; 3646 if (CHANGED(gid)) 3647 newmode &= ~S_ISGID; 3648 } 3649 /* Update va_mode iff altered. */ 3650 if (vattr.va_mode == newmode) 3651 newmode = VNOVAL; 3652 3653 vattr_null(&vattr); 3654 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3655 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3656 vattr.va_mode = newmode; 3657 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3658 #undef CHANGED 3659 3660 out: 3661 VOP_UNLOCK(vp); 3662 return (error); 3663 } 3664 3665 /* 3666 * Set the access and modification times given a path name; this 3667 * version follows links. 3668 */ 3669 /* ARGSUSED */ 3670 int 3671 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3672 register_t *retval) 3673 { 3674 /* { 3675 syscallarg(const char *) path; 3676 syscallarg(const struct timeval *) tptr; 3677 } */ 3678 3679 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3680 SCARG(uap, tptr), UIO_USERSPACE); 3681 } 3682 3683 /* 3684 * Set the access and modification times given a file descriptor. 3685 */ 3686 /* ARGSUSED */ 3687 int 3688 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3689 register_t *retval) 3690 { 3691 /* { 3692 syscallarg(int) fd; 3693 syscallarg(const struct timeval *) tptr; 3694 } */ 3695 int error; 3696 file_t *fp; 3697 3698 /* fd_getvnode() will use the descriptor for us */ 3699 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3700 return (error); 3701 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3702 UIO_USERSPACE); 3703 fd_putfile(SCARG(uap, fd)); 3704 return (error); 3705 } 3706 3707 int 3708 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3709 register_t *retval) 3710 { 3711 /* { 3712 syscallarg(int) fd; 3713 syscallarg(const struct timespec *) tptr; 3714 } */ 3715 int error; 3716 file_t *fp; 3717 3718 /* fd_getvnode() will use the descriptor for us */ 3719 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3720 return (error); 3721 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0, 3722 SCARG(uap, tptr), UIO_USERSPACE); 3723 fd_putfile(SCARG(uap, fd)); 3724 return (error); 3725 } 3726 3727 /* 3728 * Set the access and modification times given a path name; this 3729 * version does not follow links. 3730 */ 3731 int 3732 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3733 register_t *retval) 3734 { 3735 /* { 3736 syscallarg(const char *) path; 3737 syscallarg(const struct timeval *) tptr; 3738 } */ 3739 3740 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3741 SCARG(uap, tptr), UIO_USERSPACE); 3742 } 3743 3744 int 3745 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3746 register_t *retval) 3747 { 3748 /* { 3749 syscallarg(int) fd; 3750 syscallarg(const char *) path; 3751 syscallarg(const struct timespec *) tptr; 3752 syscallarg(int) flag; 3753 } */ 3754 int follow; 3755 const struct timespec *tptr; 3756 int error; 3757 3758 tptr = SCARG(uap, tptr); 3759 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3760 3761 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3762 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3763 3764 return error; 3765 } 3766 3767 /* 3768 * Common routine to set access and modification times given a vnode. 3769 */ 3770 int 3771 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3772 const struct timespec *tptr, enum uio_seg seg) 3773 { 3774 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3775 } 3776 3777 int 3778 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3779 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3780 { 3781 struct vattr vattr; 3782 int error, dorele = 0; 3783 namei_simple_flags_t sflags; 3784 bool vanull, setbirthtime; 3785 struct timespec ts[2]; 3786 3787 KASSERT(l != NULL || fdat == AT_FDCWD); 3788 3789 /* 3790 * I have checked all callers and they pass either FOLLOW, 3791 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3792 * is 0. More to the point, they don't pass anything else. 3793 * Let's keep it that way at least until the namei interfaces 3794 * are fully sanitized. 3795 */ 3796 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3797 sflags = (flag == FOLLOW) ? 3798 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3799 3800 if (tptr == NULL) { 3801 vanull = true; 3802 nanotime(&ts[0]); 3803 ts[1] = ts[0]; 3804 } else { 3805 vanull = false; 3806 if (seg != UIO_SYSSPACE) { 3807 error = copyin(tptr, ts, sizeof (ts)); 3808 if (error != 0) 3809 return error; 3810 } else { 3811 ts[0] = tptr[0]; 3812 ts[1] = tptr[1]; 3813 } 3814 } 3815 3816 if (ts[0].tv_nsec == UTIME_NOW) { 3817 nanotime(&ts[0]); 3818 if (ts[1].tv_nsec == UTIME_NOW) { 3819 vanull = true; 3820 ts[1] = ts[0]; 3821 } 3822 } else if (ts[1].tv_nsec == UTIME_NOW) 3823 nanotime(&ts[1]); 3824 3825 if (vp == NULL) { 3826 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3827 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3828 if (error != 0) 3829 return error; 3830 dorele = 1; 3831 } 3832 3833 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3834 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3835 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3836 vattr_null(&vattr); 3837 3838 if (ts[0].tv_nsec != UTIME_OMIT) 3839 vattr.va_atime = ts[0]; 3840 3841 if (ts[1].tv_nsec != UTIME_OMIT) { 3842 vattr.va_mtime = ts[1]; 3843 if (setbirthtime) 3844 vattr.va_birthtime = ts[1]; 3845 } 3846 3847 if (vanull) 3848 vattr.va_vaflags |= VA_UTIMES_NULL; 3849 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3850 VOP_UNLOCK(vp); 3851 3852 if (dorele != 0) 3853 vrele(vp); 3854 3855 return error; 3856 } 3857 3858 int 3859 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3860 const struct timeval *tptr, enum uio_seg seg) 3861 { 3862 struct timespec ts[2]; 3863 struct timespec *tsptr = NULL; 3864 int error; 3865 3866 if (tptr != NULL) { 3867 struct timeval tv[2]; 3868 3869 if (seg != UIO_SYSSPACE) { 3870 error = copyin(tptr, tv, sizeof (tv)); 3871 if (error != 0) 3872 return error; 3873 tptr = tv; 3874 } 3875 3876 if ((tv[0].tv_usec == UTIME_NOW) || 3877 (tv[0].tv_usec == UTIME_OMIT)) 3878 ts[0].tv_nsec = tv[0].tv_usec; 3879 else 3880 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3881 3882 if ((tv[1].tv_usec == UTIME_NOW) || 3883 (tv[1].tv_usec == UTIME_OMIT)) 3884 ts[1].tv_nsec = tv[1].tv_usec; 3885 else 3886 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3887 3888 tsptr = &ts[0]; 3889 } 3890 3891 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3892 } 3893 3894 /* 3895 * Truncate a file given its path name. 3896 */ 3897 /* ARGSUSED */ 3898 int 3899 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3900 { 3901 /* { 3902 syscallarg(const char *) path; 3903 syscallarg(int) pad; 3904 syscallarg(off_t) length; 3905 } */ 3906 struct vnode *vp; 3907 struct vattr vattr; 3908 int error; 3909 3910 error = namei_simple_user(SCARG(uap, path), 3911 NSM_FOLLOW_TRYEMULROOT, &vp); 3912 if (error != 0) 3913 return (error); 3914 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3915 if (vp->v_type == VDIR) 3916 error = EISDIR; 3917 else if ((error = vn_writechk(vp)) == 0 && 3918 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3919 vattr_null(&vattr); 3920 vattr.va_size = SCARG(uap, length); 3921 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3922 } 3923 vput(vp); 3924 return (error); 3925 } 3926 3927 /* 3928 * Truncate a file given a file descriptor. 3929 */ 3930 /* ARGSUSED */ 3931 int 3932 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3933 { 3934 /* { 3935 syscallarg(int) fd; 3936 syscallarg(int) pad; 3937 syscallarg(off_t) length; 3938 } */ 3939 struct vattr vattr; 3940 struct vnode *vp; 3941 file_t *fp; 3942 int error; 3943 3944 /* fd_getvnode() will use the descriptor for us */ 3945 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3946 return (error); 3947 if ((fp->f_flag & FWRITE) == 0) { 3948 error = EINVAL; 3949 goto out; 3950 } 3951 vp = fp->f_data; 3952 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3953 if (vp->v_type == VDIR) 3954 error = EISDIR; 3955 else if ((error = vn_writechk(vp)) == 0) { 3956 vattr_null(&vattr); 3957 vattr.va_size = SCARG(uap, length); 3958 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3959 } 3960 VOP_UNLOCK(vp); 3961 out: 3962 fd_putfile(SCARG(uap, fd)); 3963 return (error); 3964 } 3965 3966 /* 3967 * Sync an open file. 3968 */ 3969 /* ARGSUSED */ 3970 int 3971 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3972 { 3973 /* { 3974 syscallarg(int) fd; 3975 } */ 3976 struct vnode *vp; 3977 file_t *fp; 3978 int error; 3979 3980 /* fd_getvnode() will use the descriptor for us */ 3981 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3982 return (error); 3983 vp = fp->f_data; 3984 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3985 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3986 VOP_UNLOCK(vp); 3987 fd_putfile(SCARG(uap, fd)); 3988 return (error); 3989 } 3990 3991 /* 3992 * Sync a range of file data. API modeled after that found in AIX. 3993 * 3994 * FDATASYNC indicates that we need only save enough metadata to be able 3995 * to re-read the written data. Note we duplicate AIX's requirement that 3996 * the file be open for writing. 3997 */ 3998 /* ARGSUSED */ 3999 int 4000 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4001 { 4002 /* { 4003 syscallarg(int) fd; 4004 syscallarg(int) flags; 4005 syscallarg(off_t) start; 4006 syscallarg(off_t) length; 4007 } */ 4008 struct vnode *vp; 4009 file_t *fp; 4010 int flags, nflags; 4011 off_t s, e, len; 4012 int error; 4013 4014 /* fd_getvnode() will use the descriptor for us */ 4015 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4016 return (error); 4017 4018 if ((fp->f_flag & FWRITE) == 0) { 4019 error = EBADF; 4020 goto out; 4021 } 4022 4023 flags = SCARG(uap, flags); 4024 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4025 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4026 error = EINVAL; 4027 goto out; 4028 } 4029 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4030 if (flags & FDATASYNC) 4031 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4032 else 4033 nflags = FSYNC_WAIT; 4034 if (flags & FDISKSYNC) 4035 nflags |= FSYNC_CACHE; 4036 4037 len = SCARG(uap, length); 4038 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4039 if (len) { 4040 s = SCARG(uap, start); 4041 e = s + len; 4042 if (e < s) { 4043 error = EINVAL; 4044 goto out; 4045 } 4046 } else { 4047 e = 0; 4048 s = 0; 4049 } 4050 4051 vp = fp->f_data; 4052 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4053 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4054 VOP_UNLOCK(vp); 4055 out: 4056 fd_putfile(SCARG(uap, fd)); 4057 return (error); 4058 } 4059 4060 /* 4061 * Sync the data of an open file. 4062 */ 4063 /* ARGSUSED */ 4064 int 4065 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4066 { 4067 /* { 4068 syscallarg(int) fd; 4069 } */ 4070 struct vnode *vp; 4071 file_t *fp; 4072 int error; 4073 4074 /* fd_getvnode() will use the descriptor for us */ 4075 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4076 return (error); 4077 if ((fp->f_flag & FWRITE) == 0) { 4078 fd_putfile(SCARG(uap, fd)); 4079 return (EBADF); 4080 } 4081 vp = fp->f_data; 4082 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4083 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4084 VOP_UNLOCK(vp); 4085 fd_putfile(SCARG(uap, fd)); 4086 return (error); 4087 } 4088 4089 /* 4090 * Rename files, (standard) BSD semantics frontend. 4091 */ 4092 /* ARGSUSED */ 4093 int 4094 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4095 { 4096 /* { 4097 syscallarg(const char *) from; 4098 syscallarg(const char *) to; 4099 } */ 4100 4101 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4102 SCARG(uap, to), UIO_USERSPACE, 0)); 4103 } 4104 4105 int 4106 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4107 register_t *retval) 4108 { 4109 /* { 4110 syscallarg(int) fromfd; 4111 syscallarg(const char *) from; 4112 syscallarg(int) tofd; 4113 syscallarg(const char *) to; 4114 } */ 4115 4116 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4117 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4118 } 4119 4120 /* 4121 * Rename files, POSIX semantics frontend. 4122 */ 4123 /* ARGSUSED */ 4124 int 4125 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4126 { 4127 /* { 4128 syscallarg(const char *) from; 4129 syscallarg(const char *) to; 4130 } */ 4131 4132 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4133 SCARG(uap, to), UIO_USERSPACE, 1)); 4134 } 4135 4136 /* 4137 * Rename files. Source and destination must either both be directories, 4138 * or both not be directories. If target is a directory, it must be empty. 4139 * If `from' and `to' refer to the same object, the value of the `retain' 4140 * argument is used to determine whether `from' will be 4141 * 4142 * (retain == 0) deleted unless `from' and `to' refer to the same 4143 * object in the file system's name space (BSD). 4144 * (retain == 1) always retained (POSIX). 4145 * 4146 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4147 */ 4148 int 4149 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4150 { 4151 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4152 } 4153 4154 static int 4155 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4156 const char *to, enum uio_seg seg, int retain) 4157 { 4158 struct pathbuf *fpb, *tpb; 4159 struct nameidata fnd, tnd; 4160 struct vnode *fdvp, *fvp; 4161 struct vnode *tdvp, *tvp; 4162 struct mount *mp, *tmp; 4163 int error; 4164 4165 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4166 4167 error = pathbuf_maybe_copyin(from, seg, &fpb); 4168 if (error) 4169 goto out0; 4170 KASSERT(fpb != NULL); 4171 4172 error = pathbuf_maybe_copyin(to, seg, &tpb); 4173 if (error) 4174 goto out1; 4175 KASSERT(tpb != NULL); 4176 4177 /* 4178 * Lookup from. 4179 * 4180 * XXX LOCKPARENT is wrong because we don't actually want it 4181 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4182 * insane, so for the time being we need to leave it like this. 4183 */ 4184 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb); 4185 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4186 goto out2; 4187 4188 /* 4189 * Pull out the important results of the lookup, fdvp and fvp. 4190 * Of course, fvp is bogus because we're about to unlock fdvp. 4191 */ 4192 fdvp = fnd.ni_dvp; 4193 fvp = fnd.ni_vp; 4194 KASSERT(fdvp != NULL); 4195 KASSERT(fvp != NULL); 4196 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4197 4198 /* 4199 * Make sure neither fdvp nor fvp is locked. 4200 */ 4201 if (fdvp != fvp) 4202 VOP_UNLOCK(fdvp); 4203 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4204 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4205 4206 /* 4207 * Reject renaming `.' and `..'. Can't do this until after 4208 * namei because we need namei's parsing to find the final 4209 * component name. (namei should just leave us with the final 4210 * component name and not look it up itself, but anyway...) 4211 * 4212 * This was here before because we used to relookup from 4213 * instead of to and relookup requires the caller to check 4214 * this, but now file systems may depend on this check, so we 4215 * must retain it until the file systems are all rototilled. 4216 */ 4217 if (((fnd.ni_cnd.cn_namelen == 1) && 4218 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4219 ((fnd.ni_cnd.cn_namelen == 2) && 4220 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4221 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4222 error = EINVAL; /* XXX EISDIR? */ 4223 goto abort0; 4224 } 4225 4226 /* 4227 * Lookup to. 4228 * 4229 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4230 * fvp here to decide whether to add CREATEDIR is a load of 4231 * bollocks because fvp might be the wrong node by now, since 4232 * fdvp is unlocked. 4233 * 4234 * XXX Why not pass CREATEDIR always? 4235 */ 4236 NDINIT(&tnd, RENAME, 4237 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME | 4238 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4239 tpb); 4240 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4241 goto abort0; 4242 4243 /* 4244 * Pull out the important results of the lookup, tdvp and tvp. 4245 * Of course, tvp is bogus because we're about to unlock tdvp. 4246 */ 4247 tdvp = tnd.ni_dvp; 4248 tvp = tnd.ni_vp; 4249 KASSERT(tdvp != NULL); 4250 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4251 4252 /* 4253 * Make sure neither tdvp nor tvp is locked. 4254 */ 4255 if (tdvp != tvp) 4256 VOP_UNLOCK(tdvp); 4257 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4258 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4259 4260 /* 4261 * Reject renaming onto `.' or `..'. relookup is unhappy with 4262 * these, which is why we must do this here. Once upon a time 4263 * we relooked up from instead of to, and consequently didn't 4264 * need this check, but now that we relookup to instead of 4265 * from, we need this; and we shall need it forever forward 4266 * until the VOP_RENAME protocol changes, because file systems 4267 * will no doubt begin to depend on this check. 4268 */ 4269 if (((tnd.ni_cnd.cn_namelen == 1) && 4270 (tnd.ni_cnd.cn_nameptr[0] == '.')) || 4271 ((tnd.ni_cnd.cn_namelen == 2) && 4272 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4273 (tnd.ni_cnd.cn_nameptr[1] == '.'))) { 4274 error = EINVAL; /* XXX EISDIR? */ 4275 goto abort1; 4276 } 4277 4278 /* 4279 * Get the mount point. If the file system has been unmounted, 4280 * which it may be because we're not holding any vnode locks, 4281 * then v_mount will be NULL. We're not really supposed to 4282 * read v_mount without holding the vnode lock, but since we 4283 * have fdvp referenced, if fdvp->v_mount changes then at worst 4284 * it will be set to NULL, not changed to another mount point. 4285 * And, of course, since it is up to the file system to 4286 * determine the real lock order, we can't lock both fdvp and 4287 * tdvp at the same time. 4288 */ 4289 mp = fdvp->v_mount; 4290 if (mp == NULL) { 4291 error = ENOENT; 4292 goto abort1; 4293 } 4294 4295 /* 4296 * Make sure the mount points match. Again, although we don't 4297 * hold any vnode locks, the v_mount fields may change -- but 4298 * at worst they will change to NULL, so this will never become 4299 * a cross-device rename, because we hold vnode references. 4300 * 4301 * XXX Because nothing is locked and the compiler may reorder 4302 * things here, unmounting the file system at an inopportune 4303 * moment may cause rename to fail with ENXDEV when it really 4304 * should fail with ENOENT. 4305 */ 4306 tmp = tdvp->v_mount; 4307 if (tmp == NULL) { 4308 error = ENOENT; 4309 goto abort1; 4310 } 4311 4312 if (mp != tmp) { 4313 error = EXDEV; 4314 goto abort1; 4315 } 4316 4317 /* 4318 * Take the vfs rename lock to avoid cross-directory screw cases. 4319 * Nothing is locked currently, so taking this lock is safe. 4320 */ 4321 error = VFS_RENAMELOCK_ENTER(mp); 4322 if (error) 4323 goto abort1; 4324 4325 /* 4326 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4327 * and nothing is locked except for the vfs rename lock. 4328 * 4329 * The next step is a little rain dance to conform to the 4330 * insane lock protocol, even though it does nothing to ward 4331 * off race conditions. 4332 * 4333 * We need tdvp and tvp to be locked. However, because we have 4334 * unlocked tdvp in order to hold no locks while we take the 4335 * vfs rename lock, tvp may be wrong here, and we can't safely 4336 * lock it even if the sensible file systems will just unlock 4337 * it straight away. Consequently, we must lock tdvp and then 4338 * relookup tvp to get it locked. 4339 * 4340 * Finally, because the VOP_RENAME protocol is brain-damaged 4341 * and various file systems insanely depend on the semantics of 4342 * this brain damage, the lookup of to must be the last lookup 4343 * before VOP_RENAME. 4344 */ 4345 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4346 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4347 if (error) 4348 goto abort2; 4349 4350 /* 4351 * Drop the old tvp and pick up the new one -- which might be 4352 * the same, but that doesn't matter to us. After this, tdvp 4353 * and tvp should both be locked. 4354 */ 4355 if (tvp != NULL) 4356 vrele(tvp); 4357 tvp = tnd.ni_vp; 4358 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4359 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4360 4361 /* 4362 * The old do_sys_rename had various consistency checks here 4363 * involving fvp and tvp. fvp is bogus already here, and tvp 4364 * will become bogus soon in any sensible file system, so the 4365 * only purpose in putting these checks here is to give lip 4366 * service to these screw cases and to acknowledge that they 4367 * exist, not actually to handle them, but here you go 4368 * anyway... 4369 */ 4370 4371 /* 4372 * Acknowledge that directories and non-directories aren't 4373 * suposed to mix. 4374 */ 4375 if (tvp != NULL) { 4376 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4377 error = ENOTDIR; 4378 goto abort3; 4379 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4380 error = EISDIR; 4381 goto abort3; 4382 } 4383 } 4384 4385 /* 4386 * Acknowledge some random screw case, among the dozens that 4387 * might arise. 4388 */ 4389 if (fvp == tdvp) { 4390 error = EINVAL; 4391 goto abort3; 4392 } 4393 4394 /* 4395 * Acknowledge that POSIX has a wacky screw case. 4396 * 4397 * XXX Eventually the retain flag needs to be passed on to 4398 * VOP_RENAME. 4399 */ 4400 if (fvp == tvp) { 4401 if (retain) { 4402 error = 0; 4403 goto abort3; 4404 } else if ((fdvp == tdvp) && 4405 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4406 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4407 fnd.ni_cnd.cn_namelen))) { 4408 error = 0; 4409 goto abort3; 4410 } 4411 } 4412 4413 /* 4414 * Make sure veriexec can screw us up. (But a race can screw 4415 * up veriexec, of course -- remember, fvp and (soon) tvp are 4416 * bogus.) 4417 */ 4418 #if NVERIEXEC > 0 4419 { 4420 char *f1, *f2; 4421 size_t f1_len; 4422 size_t f2_len; 4423 4424 f1_len = fnd.ni_cnd.cn_namelen + 1; 4425 f1 = kmem_alloc(f1_len, KM_SLEEP); 4426 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4427 4428 f2_len = tnd.ni_cnd.cn_namelen + 1; 4429 f2 = kmem_alloc(f2_len, KM_SLEEP); 4430 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4431 4432 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4433 4434 kmem_free(f1, f1_len); 4435 kmem_free(f2, f2_len); 4436 4437 if (error) 4438 goto abort3; 4439 } 4440 #endif /* NVERIEXEC > 0 */ 4441 4442 /* 4443 * All ready. Incant the rename vop. 4444 */ 4445 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4446 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4447 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4448 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4449 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4450 4451 /* 4452 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4453 * tdvp and tvp. But we can't assert any of that. 4454 */ 4455 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4456 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4457 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4458 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4459 4460 /* 4461 * So all we have left to do is to drop the rename lock and 4462 * destroy the pathbufs. 4463 */ 4464 VFS_RENAMELOCK_EXIT(mp); 4465 goto out2; 4466 4467 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4468 VOP_UNLOCK(tvp); 4469 abort2: VOP_UNLOCK(tdvp); 4470 VFS_RENAMELOCK_EXIT(mp); 4471 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4472 vrele(tdvp); 4473 if (tvp != NULL) 4474 vrele(tvp); 4475 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4476 vrele(fdvp); 4477 vrele(fvp); 4478 out2: pathbuf_destroy(tpb); 4479 out1: pathbuf_destroy(fpb); 4480 out0: return error; 4481 } 4482 4483 /* 4484 * Make a directory file. 4485 */ 4486 /* ARGSUSED */ 4487 int 4488 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4489 { 4490 /* { 4491 syscallarg(const char *) path; 4492 syscallarg(int) mode; 4493 } */ 4494 4495 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4496 SCARG(uap, mode), UIO_USERSPACE); 4497 } 4498 4499 int 4500 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4501 register_t *retval) 4502 { 4503 /* { 4504 syscallarg(int) fd; 4505 syscallarg(const char *) path; 4506 syscallarg(int) mode; 4507 } */ 4508 4509 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4510 SCARG(uap, mode), UIO_USERSPACE); 4511 } 4512 4513 4514 int 4515 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4516 { 4517 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4518 } 4519 4520 static int 4521 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4522 enum uio_seg seg) 4523 { 4524 struct proc *p = curlwp->l_proc; 4525 struct vnode *vp; 4526 struct vattr vattr; 4527 int error; 4528 struct pathbuf *pb; 4529 struct nameidata nd; 4530 4531 KASSERT(l != NULL || fdat == AT_FDCWD); 4532 4533 /* XXX bollocks, should pass in a pathbuf */ 4534 error = pathbuf_maybe_copyin(path, seg, &pb); 4535 if (error) { 4536 return error; 4537 } 4538 4539 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4540 4541 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4542 pathbuf_destroy(pb); 4543 return (error); 4544 } 4545 vp = nd.ni_vp; 4546 if (vp != NULL) { 4547 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4548 if (nd.ni_dvp == vp) 4549 vrele(nd.ni_dvp); 4550 else 4551 vput(nd.ni_dvp); 4552 vrele(vp); 4553 pathbuf_destroy(pb); 4554 return (EEXIST); 4555 } 4556 vattr_null(&vattr); 4557 vattr.va_type = VDIR; 4558 /* We will read cwdi->cwdi_cmask unlocked. */ 4559 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4560 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4561 if (!error) 4562 vput(nd.ni_vp); 4563 pathbuf_destroy(pb); 4564 return (error); 4565 } 4566 4567 /* 4568 * Remove a directory file. 4569 */ 4570 /* ARGSUSED */ 4571 int 4572 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4573 { 4574 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4575 AT_REMOVEDIR, UIO_USERSPACE); 4576 } 4577 4578 /* 4579 * Read a block of directory entries in a file system independent format. 4580 */ 4581 int 4582 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4583 { 4584 /* { 4585 syscallarg(int) fd; 4586 syscallarg(char *) buf; 4587 syscallarg(size_t) count; 4588 } */ 4589 file_t *fp; 4590 int error, done; 4591 4592 /* fd_getvnode() will use the descriptor for us */ 4593 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4594 return (error); 4595 if ((fp->f_flag & FREAD) == 0) { 4596 error = EBADF; 4597 goto out; 4598 } 4599 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4600 SCARG(uap, count), &done, l, 0, 0); 4601 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4602 *retval = done; 4603 out: 4604 fd_putfile(SCARG(uap, fd)); 4605 return (error); 4606 } 4607 4608 /* 4609 * Set the mode mask for creation of filesystem nodes. 4610 */ 4611 int 4612 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4613 { 4614 /* { 4615 syscallarg(mode_t) newmask; 4616 } */ 4617 struct proc *p = l->l_proc; 4618 struct cwdinfo *cwdi; 4619 4620 /* 4621 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4622 * important is that we serialize changes to the mask. The 4623 * rw_exit() will issue a write memory barrier on our behalf, 4624 * and force the changes out to other CPUs (as it must use an 4625 * atomic operation, draining the local CPU's store buffers). 4626 */ 4627 cwdi = p->p_cwdi; 4628 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4629 *retval = cwdi->cwdi_cmask; 4630 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4631 rw_exit(&cwdi->cwdi_lock); 4632 4633 return (0); 4634 } 4635 4636 int 4637 dorevoke(struct vnode *vp, kauth_cred_t cred) 4638 { 4639 struct vattr vattr; 4640 int error, fs_decision; 4641 4642 vn_lock(vp, LK_SHARED | LK_RETRY); 4643 error = VOP_GETATTR(vp, &vattr, cred); 4644 VOP_UNLOCK(vp); 4645 if (error != 0) 4646 return error; 4647 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4648 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4649 fs_decision); 4650 if (!error) 4651 VOP_REVOKE(vp, REVOKEALL); 4652 return (error); 4653 } 4654 4655 /* 4656 * Void all references to file by ripping underlying filesystem 4657 * away from vnode. 4658 */ 4659 /* ARGSUSED */ 4660 int 4661 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4662 { 4663 /* { 4664 syscallarg(const char *) path; 4665 } */ 4666 struct vnode *vp; 4667 int error; 4668 4669 error = namei_simple_user(SCARG(uap, path), 4670 NSM_FOLLOW_TRYEMULROOT, &vp); 4671 if (error != 0) 4672 return (error); 4673 error = dorevoke(vp, l->l_cred); 4674 vrele(vp); 4675 return (error); 4676 } 4677