1 /* $NetBSD: vfs_syscalls.c,v 1.548 2020/05/16 18:31:50 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.548 2020/05/16 18:31:50 christos Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const u_int nmountcompatnames = __arraycount(mountcompatnames); 167 168 static int 169 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 170 { 171 file_t *dfp; 172 int error; 173 174 if (fdat != AT_FDCWD) { 175 if ((error = fd_getvnode(fdat, &dfp)) != 0) 176 goto out; 177 178 NDAT(ndp, dfp->f_vnode); 179 } 180 181 error = namei(ndp); 182 183 if (fdat != AT_FDCWD) 184 fd_putfile(fdat); 185 out: 186 return error; 187 } 188 189 static int 190 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 191 namei_simple_flags_t sflags, struct vnode **vp_ret) 192 { 193 file_t *dfp; 194 struct vnode *dvp; 195 int error; 196 197 if (fdat != AT_FDCWD) { 198 if ((error = fd_getvnode(fdat, &dfp)) != 0) 199 goto out; 200 201 dvp = dfp->f_vnode; 202 } else { 203 dvp = NULL; 204 } 205 206 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 207 208 if (fdat != AT_FDCWD) 209 fd_putfile(fdat); 210 out: 211 return error; 212 } 213 214 static int 215 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 216 { 217 int error; 218 219 fp->f_flag = flags & FMASK; 220 fp->f_type = DTYPE_VNODE; 221 fp->f_ops = &vnops; 222 fp->f_vnode = vp; 223 224 if (flags & (O_EXLOCK | O_SHLOCK)) { 225 struct flock lf; 226 int type; 227 228 lf.l_whence = SEEK_SET; 229 lf.l_start = 0; 230 lf.l_len = 0; 231 if (flags & O_EXLOCK) 232 lf.l_type = F_WRLCK; 233 else 234 lf.l_type = F_RDLCK; 235 type = F_FLOCK; 236 if ((flags & FNONBLOCK) == 0) 237 type |= F_WAIT; 238 VOP_UNLOCK(vp); 239 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 240 if (error) { 241 (void) vn_close(vp, fp->f_flag, fp->f_cred); 242 fd_abort(l->l_proc, fp, indx); 243 return error; 244 } 245 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 246 atomic_or_uint(&fp->f_flag, FHASLOCK); 247 } 248 if (flags & O_CLOEXEC) 249 fd_set_exclose(l, indx, true); 250 return 0; 251 } 252 253 static int 254 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 255 void *data, size_t *data_len) 256 { 257 struct mount *mp; 258 int error = 0, saved_flags; 259 260 mp = vp->v_mount; 261 saved_flags = mp->mnt_flag; 262 263 /* We can operate only on VV_ROOT nodes. */ 264 if ((vp->v_vflag & VV_ROOT) == 0) { 265 error = EINVAL; 266 goto out; 267 } 268 269 /* 270 * We only allow the filesystem to be reloaded if it 271 * is currently mounted read-only. Additionally, we 272 * prevent read-write to read-only downgrades. 273 */ 274 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 275 (mp->mnt_flag & MNT_RDONLY) == 0 && 276 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 277 error = EOPNOTSUPP; /* Needs translation */ 278 goto out; 279 } 280 281 /* 282 * Enabling MNT_UNION requires a covered mountpoint and 283 * must not happen on the root mount. 284 */ 285 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 286 error = EOPNOTSUPP; 287 goto out; 288 } 289 290 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 291 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 292 if (error) 293 goto out; 294 295 error = vfs_suspend(mp, 0); 296 if (error) 297 goto out; 298 299 mutex_enter(mp->mnt_updating); 300 301 mp->mnt_flag &= ~MNT_OP_FLAGS; 302 mp->mnt_flag |= flags & MNT_OP_FLAGS; 303 304 /* 305 * Set the mount level flags. 306 */ 307 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 308 if ((flags & MNT_RDONLY)) 309 mp->mnt_iflag |= IMNT_WANTRDONLY; 310 else 311 mp->mnt_iflag |= IMNT_WANTRDWR; 312 } 313 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 314 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 315 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 316 mp->mnt_flag &= ~MNT_RDONLY; 317 318 error = VFS_MOUNT(mp, path, data, data_len); 319 320 if (error && data != NULL) { 321 int error2; 322 323 /* 324 * Update failed; let's try and see if it was an 325 * export request. For compat with 3.0 and earlier. 326 */ 327 error2 = vfs_hooks_reexport(mp, path, data); 328 329 /* 330 * Only update error code if the export request was 331 * understood but some problem occurred while 332 * processing it. 333 */ 334 if (error2 != EJUSTRETURN) 335 error = error2; 336 } 337 338 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 339 mp->mnt_flag |= MNT_RDONLY; 340 if (error) 341 mp->mnt_flag = saved_flags; 342 mp->mnt_flag &= ~MNT_OP_FLAGS; 343 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 344 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 345 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 346 vfs_syncer_add_to_worklist(mp); 347 } else { 348 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 349 vfs_syncer_remove_from_worklist(mp); 350 } 351 mutex_exit(mp->mnt_updating); 352 vfs_resume(mp); 353 354 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 355 (flags & MNT_EXTATTR)) { 356 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 357 NULL, 0, NULL) != 0) { 358 printf("%s: failed to start extattr, error = %d", 359 mp->mnt_stat.f_mntonname, error); 360 mp->mnt_flag &= ~MNT_EXTATTR; 361 } 362 } 363 364 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 365 !(flags & MNT_EXTATTR)) { 366 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 367 NULL, 0, NULL) != 0) { 368 printf("%s: failed to stop extattr, error = %d", 369 mp->mnt_stat.f_mntonname, error); 370 mp->mnt_flag |= MNT_RDONLY; 371 } 372 } 373 out: 374 return (error); 375 } 376 377 static int 378 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 379 struct vfsops **vfsops) 380 { 381 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 382 int error; 383 384 if (type_seg == UIO_USERSPACE) { 385 /* Copy file-system type from userspace. */ 386 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 387 } else { 388 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 389 KASSERT(error == 0); 390 } 391 392 if (error) { 393 /* 394 * Historically, filesystem types were identified by numbers. 395 * If we get an integer for the filesystem type instead of a 396 * string, we check to see if it matches one of the historic 397 * filesystem types. 398 */ 399 u_long fsindex = (u_long)fstype; 400 if (fsindex >= nmountcompatnames || 401 mountcompatnames[fsindex] == NULL) 402 return ENODEV; 403 strlcpy(fstypename, mountcompatnames[fsindex], 404 sizeof(fstypename)); 405 } 406 407 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 408 if (strcmp(fstypename, "ufs") == 0) 409 fstypename[0] = 'f'; 410 411 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 412 return 0; 413 414 /* If we can autoload a vfs module, try again */ 415 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 416 417 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 418 return 0; 419 420 return ENODEV; 421 } 422 423 static int 424 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 425 void *data, size_t *data_len) 426 { 427 struct mount *mp; 428 int error; 429 430 /* If MNT_GETARGS is specified, it should be the only flag. */ 431 if (flags & ~MNT_GETARGS) 432 return EINVAL; 433 434 mp = vp->v_mount; 435 436 /* XXX: probably some notion of "can see" here if we want isolation. */ 437 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 438 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 439 if (error) 440 return error; 441 442 if ((vp->v_vflag & VV_ROOT) == 0) 443 return EINVAL; 444 445 if (vfs_busy(mp)) 446 return EPERM; 447 448 mutex_enter(mp->mnt_updating); 449 mp->mnt_flag &= ~MNT_OP_FLAGS; 450 mp->mnt_flag |= MNT_GETARGS; 451 error = VFS_MOUNT(mp, path, data, data_len); 452 mp->mnt_flag &= ~MNT_OP_FLAGS; 453 mutex_exit(mp->mnt_updating); 454 455 vfs_unbusy(mp); 456 return (error); 457 } 458 459 int 460 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 461 { 462 /* { 463 syscallarg(const char *) type; 464 syscallarg(const char *) path; 465 syscallarg(int) flags; 466 syscallarg(void *) data; 467 syscallarg(size_t) data_len; 468 } */ 469 470 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 471 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 472 SCARG(uap, data_len), retval); 473 } 474 475 int 476 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 477 const char *path, int flags, void *data, enum uio_seg data_seg, 478 size_t data_len, register_t *retval) 479 { 480 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 481 struct vnode *vp; 482 void *data_buf = data; 483 bool vfsopsrele = false; 484 size_t alloc_sz = 0; 485 int error; 486 487 /* 488 * Get vnode to be covered 489 */ 490 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 491 if (error != 0) { 492 vp = NULL; 493 goto done; 494 } 495 496 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 497 vfsops = vp->v_mount->mnt_op; 498 } else { 499 /* 'type' is userspace */ 500 error = mount_get_vfsops(type, type_seg, &vfsops); 501 if (error != 0) 502 goto done; 503 vfsopsrele = true; 504 } 505 506 /* 507 * We allow data to be NULL, even for userspace. Some fs's don't need 508 * it. The others will handle NULL. 509 */ 510 if (data != NULL && data_seg == UIO_USERSPACE) { 511 if (data_len == 0) { 512 /* No length supplied, use default for filesystem */ 513 data_len = vfsops->vfs_min_mount_data; 514 515 /* 516 * Hopefully a longer buffer won't make copyin() fail. 517 * For compatibility with 3.0 and earlier. 518 */ 519 if (flags & MNT_UPDATE 520 && data_len < sizeof (struct mnt_export_args30)) 521 data_len = sizeof (struct mnt_export_args30); 522 } 523 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 524 error = EINVAL; 525 goto done; 526 } 527 alloc_sz = data_len; 528 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 529 530 /* NFS needs the buffer even for mnt_getargs .... */ 531 error = copyin(data, data_buf, data_len); 532 if (error != 0) 533 goto done; 534 } 535 536 if (flags & MNT_GETARGS) { 537 if (data_len == 0) { 538 error = EINVAL; 539 goto done; 540 } 541 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 542 if (error != 0) 543 goto done; 544 if (data_seg == UIO_USERSPACE) 545 error = copyout(data_buf, data, data_len); 546 *retval = data_len; 547 } else if (flags & MNT_UPDATE) { 548 error = mount_update(l, vp, path, flags, data_buf, &data_len); 549 } else { 550 /* Locking is handled internally in mount_domount(). */ 551 KASSERT(vfsopsrele == true); 552 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 553 &data_len); 554 vfsopsrele = false; 555 } 556 if (!error) 557 KNOTE(&fs_klist, VQ_MOUNT); 558 559 done: 560 if (vfsopsrele) 561 vfs_delref(vfsops); 562 if (vp != NULL) { 563 vrele(vp); 564 } 565 if (data_buf != data) 566 kmem_free(data_buf, alloc_sz); 567 return (error); 568 } 569 570 /* 571 * Unmount a file system. 572 * 573 * Note: unmount takes a path to the vnode mounted on as argument, 574 * not special file (as before). 575 */ 576 /* ARGSUSED */ 577 int 578 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 579 { 580 /* { 581 syscallarg(const char *) path; 582 syscallarg(int) flags; 583 } */ 584 struct vnode *vp; 585 struct mount *mp; 586 int error; 587 struct pathbuf *pb; 588 struct nameidata nd; 589 590 error = pathbuf_copyin(SCARG(uap, path), &pb); 591 if (error) { 592 return error; 593 } 594 595 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 596 if ((error = namei(&nd)) != 0) { 597 pathbuf_destroy(pb); 598 return error; 599 } 600 vp = nd.ni_vp; 601 pathbuf_destroy(pb); 602 603 mp = vp->v_mount; 604 vfs_ref(mp); 605 VOP_UNLOCK(vp); 606 607 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 608 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 609 if (error) { 610 vrele(vp); 611 vfs_rele(mp); 612 return (error); 613 } 614 615 /* 616 * Don't allow unmounting the root file system. 617 */ 618 if (mp->mnt_flag & MNT_ROOTFS) { 619 vrele(vp); 620 vfs_rele(mp); 621 return (EINVAL); 622 } 623 624 /* 625 * Must be the root of the filesystem 626 */ 627 if ((vp->v_vflag & VV_ROOT) == 0) { 628 vrele(vp); 629 vfs_rele(mp); 630 return (EINVAL); 631 } 632 633 vrele(vp); 634 error = dounmount(mp, SCARG(uap, flags), l); 635 vfs_rele(mp); 636 if (!error) 637 KNOTE(&fs_klist, VQ_UNMOUNT); 638 return error; 639 } 640 641 /* 642 * Sync each mounted filesystem. 643 */ 644 #ifdef DEBUG 645 int syncprt = 0; 646 struct ctldebug debug0 = { "syncprt", &syncprt }; 647 #endif 648 649 void 650 do_sys_sync(struct lwp *l) 651 { 652 mount_iterator_t *iter; 653 struct mount *mp; 654 int asyncflag; 655 656 mountlist_iterator_init(&iter); 657 while ((mp = mountlist_iterator_next(iter)) != NULL) { 658 mutex_enter(mp->mnt_updating); 659 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 660 asyncflag = mp->mnt_flag & MNT_ASYNC; 661 mp->mnt_flag &= ~MNT_ASYNC; 662 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 663 if (asyncflag) 664 mp->mnt_flag |= MNT_ASYNC; 665 } 666 mutex_exit(mp->mnt_updating); 667 } 668 mountlist_iterator_destroy(iter); 669 #ifdef DEBUG 670 if (syncprt) 671 vfs_bufstats(); 672 #endif /* DEBUG */ 673 } 674 675 static bool 676 sync_vnode_filter(void *cookie, vnode_t *vp) 677 { 678 679 if (vp->v_numoutput > 0) { 680 ++*(int *)cookie; 681 } 682 return false; 683 } 684 685 int 686 vfs_syncwait(void) 687 { 688 int nbusy, nbusy_prev, iter; 689 struct vnode_iterator *vniter; 690 mount_iterator_t *mpiter; 691 struct mount *mp; 692 693 for (nbusy_prev = 0, iter = 0; iter < 20;) { 694 nbusy = 0; 695 mountlist_iterator_init(&mpiter); 696 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 697 vnode_t *vp __diagused; 698 vfs_vnode_iterator_init(mp, &vniter); 699 vp = vfs_vnode_iterator_next(vniter, 700 sync_vnode_filter, &nbusy); 701 KASSERT(vp == NULL); 702 vfs_vnode_iterator_destroy(vniter); 703 } 704 mountlist_iterator_destroy(mpiter); 705 706 if (nbusy == 0) 707 break; 708 if (nbusy_prev == 0) 709 nbusy_prev = nbusy; 710 printf("%d ", nbusy); 711 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 712 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 713 iter++; 714 else 715 nbusy_prev = nbusy; 716 } 717 718 if (nbusy) { 719 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 720 printf("giving up\nPrinting vnodes for busy buffers\n"); 721 mountlist_iterator_init(&mpiter); 722 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 723 vnode_t *vp; 724 vfs_vnode_iterator_init(mp, &vniter); 725 vp = vfs_vnode_iterator_next(vniter, 726 NULL, NULL); 727 mutex_enter(vp->v_interlock); 728 if (vp->v_numoutput > 0) 729 vprint(NULL, vp); 730 mutex_exit(vp->v_interlock); 731 vrele(vp); 732 vfs_vnode_iterator_destroy(vniter); 733 } 734 mountlist_iterator_destroy(mpiter); 735 #endif 736 } 737 738 return nbusy; 739 } 740 741 /* ARGSUSED */ 742 int 743 sys_sync(struct lwp *l, const void *v, register_t *retval) 744 { 745 do_sys_sync(l); 746 return (0); 747 } 748 749 750 /* 751 * Access or change filesystem quotas. 752 * 753 * (this is really 14 different calls bundled into one) 754 */ 755 756 static int 757 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 758 { 759 struct quotastat info_k; 760 int error; 761 762 /* ensure any padding bytes are cleared */ 763 memset(&info_k, 0, sizeof(info_k)); 764 765 error = vfs_quotactl_stat(mp, &info_k); 766 if (error) { 767 return error; 768 } 769 770 return copyout(&info_k, info_u, sizeof(info_k)); 771 } 772 773 static int 774 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 775 struct quotaidtypestat *info_u) 776 { 777 struct quotaidtypestat info_k; 778 int error; 779 780 /* ensure any padding bytes are cleared */ 781 memset(&info_k, 0, sizeof(info_k)); 782 783 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 784 if (error) { 785 return error; 786 } 787 788 return copyout(&info_k, info_u, sizeof(info_k)); 789 } 790 791 static int 792 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 793 struct quotaobjtypestat *info_u) 794 { 795 struct quotaobjtypestat info_k; 796 int error; 797 798 /* ensure any padding bytes are cleared */ 799 memset(&info_k, 0, sizeof(info_k)); 800 801 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 802 if (error) { 803 return error; 804 } 805 806 return copyout(&info_k, info_u, sizeof(info_k)); 807 } 808 809 static int 810 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 811 struct quotaval *val_u) 812 { 813 struct quotakey key_k; 814 struct quotaval val_k; 815 int error; 816 817 /* ensure any padding bytes are cleared */ 818 memset(&val_k, 0, sizeof(val_k)); 819 820 error = copyin(key_u, &key_k, sizeof(key_k)); 821 if (error) { 822 return error; 823 } 824 825 error = vfs_quotactl_get(mp, &key_k, &val_k); 826 if (error) { 827 return error; 828 } 829 830 return copyout(&val_k, val_u, sizeof(val_k)); 831 } 832 833 static int 834 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 835 const struct quotaval *val_u) 836 { 837 struct quotakey key_k; 838 struct quotaval val_k; 839 int error; 840 841 error = copyin(key_u, &key_k, sizeof(key_k)); 842 if (error) { 843 return error; 844 } 845 846 error = copyin(val_u, &val_k, sizeof(val_k)); 847 if (error) { 848 return error; 849 } 850 851 return vfs_quotactl_put(mp, &key_k, &val_k); 852 } 853 854 static int 855 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 856 { 857 struct quotakey key_k; 858 int error; 859 860 error = copyin(key_u, &key_k, sizeof(key_k)); 861 if (error) { 862 return error; 863 } 864 865 return vfs_quotactl_del(mp, &key_k); 866 } 867 868 static int 869 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 870 { 871 struct quotakcursor cursor_k; 872 int error; 873 874 /* ensure any padding bytes are cleared */ 875 memset(&cursor_k, 0, sizeof(cursor_k)); 876 877 error = vfs_quotactl_cursoropen(mp, &cursor_k); 878 if (error) { 879 return error; 880 } 881 882 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 883 } 884 885 static int 886 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 887 { 888 struct quotakcursor cursor_k; 889 int error; 890 891 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 892 if (error) { 893 return error; 894 } 895 896 return vfs_quotactl_cursorclose(mp, &cursor_k); 897 } 898 899 static int 900 do_sys_quotactl_cursorskipidtype(struct mount *mp, 901 struct quotakcursor *cursor_u, int idtype) 902 { 903 struct quotakcursor cursor_k; 904 int error; 905 906 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 907 if (error) { 908 return error; 909 } 910 911 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 912 if (error) { 913 return error; 914 } 915 916 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 917 } 918 919 static int 920 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 921 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 922 unsigned *ret_u) 923 { 924 #define CGET_STACK_MAX 8 925 struct quotakcursor cursor_k; 926 struct quotakey stackkeys[CGET_STACK_MAX]; 927 struct quotaval stackvals[CGET_STACK_MAX]; 928 struct quotakey *keys_k; 929 struct quotaval *vals_k; 930 unsigned ret_k; 931 int error; 932 933 if (maxnum > 128) { 934 maxnum = 128; 935 } 936 937 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 938 if (error) { 939 return error; 940 } 941 942 if (maxnum <= CGET_STACK_MAX) { 943 keys_k = stackkeys; 944 vals_k = stackvals; 945 /* ensure any padding bytes are cleared */ 946 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 947 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 948 } else { 949 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 950 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 951 } 952 953 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 954 &ret_k); 955 if (error) { 956 goto fail; 957 } 958 959 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 960 if (error) { 961 goto fail; 962 } 963 964 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 965 if (error) { 966 goto fail; 967 } 968 969 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 970 if (error) { 971 goto fail; 972 } 973 974 /* do last to maximize the chance of being able to recover a failure */ 975 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 976 977 fail: 978 if (keys_k != stackkeys) { 979 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 980 } 981 if (vals_k != stackvals) { 982 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 983 } 984 return error; 985 } 986 987 static int 988 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 989 int *ret_u) 990 { 991 struct quotakcursor cursor_k; 992 int ret_k; 993 int error; 994 995 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 996 if (error) { 997 return error; 998 } 999 1000 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1001 if (error) { 1002 return error; 1003 } 1004 1005 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1006 if (error) { 1007 return error; 1008 } 1009 1010 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1011 } 1012 1013 static int 1014 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1015 { 1016 struct quotakcursor cursor_k; 1017 int error; 1018 1019 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1020 if (error) { 1021 return error; 1022 } 1023 1024 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1025 if (error) { 1026 return error; 1027 } 1028 1029 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1030 } 1031 1032 static int 1033 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1034 { 1035 char *path_k; 1036 int error; 1037 1038 /* XXX this should probably be a struct pathbuf */ 1039 path_k = PNBUF_GET(); 1040 error = copyin(path_u, path_k, PATH_MAX); 1041 if (error) { 1042 PNBUF_PUT(path_k); 1043 return error; 1044 } 1045 1046 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1047 1048 PNBUF_PUT(path_k); 1049 return error; 1050 } 1051 1052 static int 1053 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1054 { 1055 return vfs_quotactl_quotaoff(mp, idtype); 1056 } 1057 1058 int 1059 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1060 { 1061 struct mount *mp; 1062 struct vnode *vp; 1063 int error; 1064 1065 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1066 if (error != 0) 1067 return (error); 1068 mp = vp->v_mount; 1069 1070 switch (args->qc_op) { 1071 case QUOTACTL_STAT: 1072 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1073 break; 1074 case QUOTACTL_IDTYPESTAT: 1075 error = do_sys_quotactl_idtypestat(mp, 1076 args->u.idtypestat.qc_idtype, 1077 args->u.idtypestat.qc_info); 1078 break; 1079 case QUOTACTL_OBJTYPESTAT: 1080 error = do_sys_quotactl_objtypestat(mp, 1081 args->u.objtypestat.qc_objtype, 1082 args->u.objtypestat.qc_info); 1083 break; 1084 case QUOTACTL_GET: 1085 error = do_sys_quotactl_get(mp, 1086 args->u.get.qc_key, 1087 args->u.get.qc_val); 1088 break; 1089 case QUOTACTL_PUT: 1090 error = do_sys_quotactl_put(mp, 1091 args->u.put.qc_key, 1092 args->u.put.qc_val); 1093 break; 1094 case QUOTACTL_DEL: 1095 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1096 break; 1097 case QUOTACTL_CURSOROPEN: 1098 error = do_sys_quotactl_cursoropen(mp, 1099 args->u.cursoropen.qc_cursor); 1100 break; 1101 case QUOTACTL_CURSORCLOSE: 1102 error = do_sys_quotactl_cursorclose(mp, 1103 args->u.cursorclose.qc_cursor); 1104 break; 1105 case QUOTACTL_CURSORSKIPIDTYPE: 1106 error = do_sys_quotactl_cursorskipidtype(mp, 1107 args->u.cursorskipidtype.qc_cursor, 1108 args->u.cursorskipidtype.qc_idtype); 1109 break; 1110 case QUOTACTL_CURSORGET: 1111 error = do_sys_quotactl_cursorget(mp, 1112 args->u.cursorget.qc_cursor, 1113 args->u.cursorget.qc_keys, 1114 args->u.cursorget.qc_vals, 1115 args->u.cursorget.qc_maxnum, 1116 args->u.cursorget.qc_ret); 1117 break; 1118 case QUOTACTL_CURSORATEND: 1119 error = do_sys_quotactl_cursoratend(mp, 1120 args->u.cursoratend.qc_cursor, 1121 args->u.cursoratend.qc_ret); 1122 break; 1123 case QUOTACTL_CURSORREWIND: 1124 error = do_sys_quotactl_cursorrewind(mp, 1125 args->u.cursorrewind.qc_cursor); 1126 break; 1127 case QUOTACTL_QUOTAON: 1128 error = do_sys_quotactl_quotaon(mp, 1129 args->u.quotaon.qc_idtype, 1130 args->u.quotaon.qc_quotafile); 1131 break; 1132 case QUOTACTL_QUOTAOFF: 1133 error = do_sys_quotactl_quotaoff(mp, 1134 args->u.quotaoff.qc_idtype); 1135 break; 1136 default: 1137 error = EINVAL; 1138 break; 1139 } 1140 1141 vrele(vp); 1142 return error; 1143 } 1144 1145 /* ARGSUSED */ 1146 int 1147 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1148 register_t *retval) 1149 { 1150 /* { 1151 syscallarg(const char *) path; 1152 syscallarg(struct quotactl_args *) args; 1153 } */ 1154 struct quotactl_args args; 1155 int error; 1156 1157 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1158 if (error) { 1159 return error; 1160 } 1161 1162 return do_sys_quotactl(SCARG(uap, path), &args); 1163 } 1164 1165 int 1166 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1167 int root) 1168 { 1169 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1170 bool chrooted; 1171 int error = 0; 1172 1173 KASSERT(l == curlwp); 1174 1175 /* 1176 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1177 * since it would imply chroots can be escaped. Just make sure this 1178 * routine is self-consistent. 1179 */ 1180 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1181 1182 /* 1183 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1184 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1185 * overrides MNT_NOWAIT. 1186 */ 1187 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1188 (flags != MNT_WAIT && flags != 0)) { 1189 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1190 } else { 1191 /* Get the filesystem stats now */ 1192 memset(sp, 0, sizeof(*sp)); 1193 if ((error = VFS_STATVFS(mp, sp)) != 0) 1194 return error; 1195 if (!chrooted) 1196 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1197 } 1198 1199 if (chrooted) { 1200 size_t len; 1201 char *bp; 1202 char c; 1203 char *path = PNBUF_GET(); 1204 1205 bp = path + MAXPATHLEN; 1206 *--bp = '\0'; 1207 rw_enter(&cwdi->cwdi_lock, RW_READER); 1208 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1209 MAXPATHLEN / 2, 0, l); 1210 rw_exit(&cwdi->cwdi_lock); 1211 if (error) { 1212 PNBUF_PUT(path); 1213 return error; 1214 } 1215 len = strlen(bp); 1216 if (len != 1) { 1217 /* 1218 * for mount points that are below our root, we can see 1219 * them, so we fix up the pathname and return them. The 1220 * rest we cannot see, so we don't allow viewing the 1221 * data. 1222 */ 1223 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1224 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1225 (void)strlcpy(sp->f_mntonname, 1226 c == '\0' ? "/" : &sp->f_mntonname[len], 1227 sizeof(sp->f_mntonname)); 1228 } else { 1229 if (root) 1230 (void)strlcpy(sp->f_mntonname, "/", 1231 sizeof(sp->f_mntonname)); 1232 else 1233 error = EPERM; 1234 } 1235 } 1236 PNBUF_PUT(path); 1237 } 1238 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1239 return error; 1240 } 1241 1242 /* 1243 * Get filesystem statistics by path. 1244 */ 1245 int 1246 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1247 { 1248 struct mount *mp; 1249 int error; 1250 struct vnode *vp; 1251 1252 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1253 if (error != 0) 1254 return error; 1255 mp = vp->v_mount; 1256 error = dostatvfs(mp, sb, l, flags, 1); 1257 vrele(vp); 1258 return error; 1259 } 1260 1261 /* ARGSUSED */ 1262 int 1263 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval) 1264 { 1265 /* { 1266 syscallarg(const char *) path; 1267 syscallarg(struct statvfs *) buf; 1268 syscallarg(int) flags; 1269 } */ 1270 struct statvfs *sb; 1271 int error; 1272 1273 sb = STATVFSBUF_GET(); 1274 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1275 if (error == 0) 1276 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1277 STATVFSBUF_PUT(sb); 1278 return error; 1279 } 1280 1281 /* 1282 * Get filesystem statistics by fd. 1283 */ 1284 int 1285 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1286 { 1287 file_t *fp; 1288 struct mount *mp; 1289 int error; 1290 1291 /* fd_getvnode() will use the descriptor for us */ 1292 if ((error = fd_getvnode(fd, &fp)) != 0) 1293 return (error); 1294 mp = fp->f_vnode->v_mount; 1295 error = dostatvfs(mp, sb, curlwp, flags, 1); 1296 fd_putfile(fd); 1297 return error; 1298 } 1299 1300 /* ARGSUSED */ 1301 int 1302 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval) 1303 { 1304 /* { 1305 syscallarg(int) fd; 1306 syscallarg(struct statvfs *) buf; 1307 syscallarg(int) flags; 1308 } */ 1309 struct statvfs *sb; 1310 int error; 1311 1312 sb = STATVFSBUF_GET(); 1313 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1314 if (error == 0) 1315 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1316 STATVFSBUF_PUT(sb); 1317 return error; 1318 } 1319 1320 1321 /* 1322 * Get statistics on all filesystems. 1323 */ 1324 int 1325 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1326 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1327 register_t *retval) 1328 { 1329 int root = 0; 1330 mount_iterator_t *iter; 1331 struct proc *p = l->l_proc; 1332 struct mount *mp; 1333 struct statvfs *sb; 1334 size_t count, maxcount; 1335 int error = 0; 1336 1337 sb = STATVFSBUF_GET(); 1338 maxcount = bufsize / entry_sz; 1339 count = 0; 1340 mountlist_iterator_init(&iter); 1341 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1342 if (sfsp && count < maxcount) { 1343 error = dostatvfs(mp, sb, l, flags, 0); 1344 if (error) { 1345 error = 0; 1346 continue; 1347 } 1348 error = copyfn(sb, sfsp, entry_sz); 1349 if (error) 1350 goto out; 1351 sfsp = (char *)sfsp + entry_sz; 1352 root |= strcmp(sb->f_mntonname, "/") == 0; 1353 } 1354 count++; 1355 } 1356 1357 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1358 /* 1359 * fake a root entry 1360 */ 1361 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1362 sb, l, flags, 1); 1363 if (error != 0) 1364 goto out; 1365 if (sfsp) { 1366 error = copyfn(sb, sfsp, entry_sz); 1367 if (error != 0) 1368 goto out; 1369 } 1370 count++; 1371 } 1372 if (sfsp && count > maxcount) 1373 *retval = maxcount; 1374 else 1375 *retval = count; 1376 out: 1377 mountlist_iterator_destroy(iter); 1378 STATVFSBUF_PUT(sb); 1379 return error; 1380 } 1381 1382 int 1383 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1384 register_t *retval) 1385 { 1386 /* { 1387 syscallarg(struct statvfs *) buf; 1388 syscallarg(size_t) bufsize; 1389 syscallarg(int) flags; 1390 } */ 1391 1392 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1393 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1394 } 1395 1396 /* 1397 * Change current working directory to a given file descriptor. 1398 */ 1399 /* ARGSUSED */ 1400 int 1401 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1402 { 1403 /* { 1404 syscallarg(int) fd; 1405 } */ 1406 struct proc *p = l->l_proc; 1407 struct cwdinfo *cwdi; 1408 struct vnode *vp, *tdp; 1409 struct mount *mp; 1410 file_t *fp; 1411 int error, fd; 1412 1413 /* fd_getvnode() will use the descriptor for us */ 1414 fd = SCARG(uap, fd); 1415 if ((error = fd_getvnode(fd, &fp)) != 0) 1416 return (error); 1417 vp = fp->f_vnode; 1418 1419 vref(vp); 1420 vn_lock(vp, LK_SHARED | LK_RETRY); 1421 if (vp->v_type != VDIR) 1422 error = ENOTDIR; 1423 else 1424 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1425 if (error) { 1426 vput(vp); 1427 goto out; 1428 } 1429 while ((mp = vp->v_mountedhere) != NULL) { 1430 error = vfs_busy(mp); 1431 vput(vp); 1432 if (error != 0) 1433 goto out; 1434 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1435 vfs_unbusy(mp); 1436 if (error) 1437 goto out; 1438 vp = tdp; 1439 } 1440 VOP_UNLOCK(vp); 1441 1442 /* 1443 * Disallow changing to a directory not under the process's 1444 * current root directory (if there is one). 1445 */ 1446 cwdi = p->p_cwdi; 1447 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1448 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1449 vrele(vp); 1450 error = EPERM; /* operation not permitted */ 1451 } else { 1452 vrele(cwdi->cwdi_cdir); 1453 cwdi->cwdi_cdir = vp; 1454 } 1455 rw_exit(&cwdi->cwdi_lock); 1456 1457 out: 1458 fd_putfile(fd); 1459 return (error); 1460 } 1461 1462 /* 1463 * Change this process's notion of the root directory to a given file 1464 * descriptor. 1465 */ 1466 int 1467 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1468 { 1469 struct vnode *vp; 1470 file_t *fp; 1471 int error, fd = SCARG(uap, fd); 1472 1473 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1474 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1475 return error; 1476 /* fd_getvnode() will use the descriptor for us */ 1477 if ((error = fd_getvnode(fd, &fp)) != 0) 1478 return error; 1479 vp = fp->f_vnode; 1480 vn_lock(vp, LK_SHARED | LK_RETRY); 1481 if (vp->v_type != VDIR) 1482 error = ENOTDIR; 1483 else 1484 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1485 VOP_UNLOCK(vp); 1486 if (error) 1487 goto out; 1488 vref(vp); 1489 change_root(vp); 1490 1491 out: 1492 fd_putfile(fd); 1493 return (error); 1494 } 1495 1496 /* 1497 * Change current working directory (``.''). 1498 */ 1499 /* ARGSUSED */ 1500 int 1501 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1502 { 1503 /* { 1504 syscallarg(const char *) path; 1505 } */ 1506 struct proc *p = l->l_proc; 1507 struct cwdinfo *cwdi; 1508 int error; 1509 struct vnode *vp; 1510 1511 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1512 &vp, l)) != 0) 1513 return (error); 1514 cwdi = p->p_cwdi; 1515 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1516 vrele(cwdi->cwdi_cdir); 1517 cwdi->cwdi_cdir = vp; 1518 rw_exit(&cwdi->cwdi_lock); 1519 return (0); 1520 } 1521 1522 /* 1523 * Change notion of root (``/'') directory. 1524 */ 1525 /* ARGSUSED */ 1526 int 1527 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1528 { 1529 /* { 1530 syscallarg(const char *) path; 1531 } */ 1532 int error; 1533 struct vnode *vp; 1534 1535 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1536 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1537 return (error); 1538 1539 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1540 if (error == 0) 1541 change_root(vp); 1542 return error; 1543 } 1544 1545 /* 1546 * Common routine for chroot and fchroot. 1547 * NB: callers need to properly authorize the change root operation. 1548 */ 1549 void 1550 change_root(struct vnode *vp) 1551 { 1552 kauth_cred_t ncred; 1553 struct lwp *l = curlwp; 1554 struct proc *p = l->l_proc; 1555 struct cwdinfo *cwdi = p->p_cwdi; 1556 1557 ncred = kauth_cred_alloc(); 1558 1559 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1560 if (cwdi->cwdi_rdir != NULL) 1561 vrele(cwdi->cwdi_rdir); 1562 cwdi->cwdi_rdir = vp; 1563 1564 /* 1565 * Prevent escaping from chroot by putting the root under 1566 * the working directory. Silently chdir to / if we aren't 1567 * already there. 1568 */ 1569 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1570 /* 1571 * XXX would be more failsafe to change directory to a 1572 * deadfs node here instead 1573 */ 1574 vrele(cwdi->cwdi_cdir); 1575 vref(vp); 1576 cwdi->cwdi_cdir = vp; 1577 } 1578 rw_exit(&cwdi->cwdi_lock); 1579 1580 /* Get a write lock on the process credential. */ 1581 proc_crmod_enter(); 1582 1583 kauth_cred_clone(p->p_cred, ncred); 1584 kauth_proc_chroot(ncred, p->p_cwdi); 1585 1586 /* Broadcast our credentials to the process and other LWPs. */ 1587 proc_crmod_leave(ncred, p->p_cred, true); 1588 } 1589 1590 /* 1591 * Common routine for chroot and chdir. 1592 * XXX "where" should be enum uio_seg 1593 */ 1594 int 1595 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1596 { 1597 struct pathbuf *pb; 1598 struct nameidata nd; 1599 int error; 1600 1601 error = pathbuf_maybe_copyin(path, where, &pb); 1602 if (error) { 1603 return error; 1604 } 1605 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1606 if ((error = namei(&nd)) != 0) { 1607 pathbuf_destroy(pb); 1608 return error; 1609 } 1610 *vpp = nd.ni_vp; 1611 pathbuf_destroy(pb); 1612 1613 if ((*vpp)->v_type != VDIR) 1614 error = ENOTDIR; 1615 else 1616 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1617 1618 if (error) 1619 vput(*vpp); 1620 else 1621 VOP_UNLOCK(*vpp); 1622 return (error); 1623 } 1624 1625 /* 1626 * Internals of sys_open - path has already been converted into a pathbuf 1627 * (so we can easily reuse this function from other parts of the kernel, 1628 * like posix_spawn post-processing). 1629 */ 1630 int 1631 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1632 int open_mode, int *fd) 1633 { 1634 struct proc *p = l->l_proc; 1635 struct cwdinfo *cwdi = p->p_cwdi; 1636 file_t *fp; 1637 struct vnode *vp; 1638 int flags, cmode; 1639 int indx, error; 1640 struct nameidata nd; 1641 1642 if (open_flags & O_SEARCH) { 1643 open_flags &= ~(int)O_SEARCH; 1644 } 1645 1646 /* 1647 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1648 * may be specified. 1649 */ 1650 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1651 return EINVAL; 1652 1653 flags = FFLAGS(open_flags); 1654 if ((flags & (FREAD | FWRITE)) == 0) 1655 return EINVAL; 1656 1657 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1658 return error; 1659 } 1660 1661 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1662 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1663 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1664 if (dvp != NULL) 1665 NDAT(&nd, dvp); 1666 1667 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1668 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1669 fd_abort(p, fp, indx); 1670 if ((error == EDUPFD || error == EMOVEFD) && 1671 l->l_dupfd >= 0 && /* XXX from fdopen */ 1672 (error = 1673 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1674 *fd = indx; 1675 return 0; 1676 } 1677 if (error == ERESTART) 1678 error = EINTR; 1679 return error; 1680 } 1681 1682 l->l_dupfd = 0; 1683 vp = nd.ni_vp; 1684 1685 if ((error = open_setfp(l, fp, vp, indx, flags))) 1686 return error; 1687 1688 VOP_UNLOCK(vp); 1689 *fd = indx; 1690 fd_affix(p, fp, indx); 1691 return 0; 1692 } 1693 1694 int 1695 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1696 { 1697 struct pathbuf *pb; 1698 int error, oflags; 1699 1700 oflags = FFLAGS(open_flags); 1701 if ((oflags & (FREAD | FWRITE)) == 0) 1702 return EINVAL; 1703 1704 pb = pathbuf_create(path); 1705 if (pb == NULL) 1706 return ENOMEM; 1707 1708 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1709 pathbuf_destroy(pb); 1710 1711 return error; 1712 } 1713 1714 static int 1715 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1716 int mode, int *fd) 1717 { 1718 file_t *dfp = NULL; 1719 struct vnode *dvp = NULL; 1720 struct pathbuf *pb; 1721 const char *pathstring = NULL; 1722 int error; 1723 1724 if (path == NULL) { 1725 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1726 if (error == ENOSYS) 1727 goto no_compat; 1728 if (error) 1729 return error; 1730 } else { 1731 no_compat: 1732 error = pathbuf_copyin(path, &pb); 1733 if (error) 1734 return error; 1735 } 1736 1737 pathstring = pathbuf_stringcopy_get(pb); 1738 1739 /* 1740 * fdat is ignored if: 1741 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1742 * 2) if path is absolute, then fdat is useless. 1743 */ 1744 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1745 /* fd_getvnode() will use the descriptor for us */ 1746 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1747 goto out; 1748 1749 dvp = dfp->f_vnode; 1750 } 1751 1752 error = do_open(l, dvp, pb, flags, mode, fd); 1753 1754 if (dfp != NULL) 1755 fd_putfile(fdat); 1756 out: 1757 pathbuf_stringcopy_put(pb, pathstring); 1758 pathbuf_destroy(pb); 1759 return error; 1760 } 1761 1762 int 1763 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1764 { 1765 /* { 1766 syscallarg(const char *) path; 1767 syscallarg(int) flags; 1768 syscallarg(int) mode; 1769 } */ 1770 int error; 1771 int fd; 1772 1773 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1774 SCARG(uap, flags), SCARG(uap, mode), &fd); 1775 1776 if (error == 0) 1777 *retval = fd; 1778 1779 return error; 1780 } 1781 1782 int 1783 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1784 { 1785 /* { 1786 syscallarg(int) fd; 1787 syscallarg(const char *) path; 1788 syscallarg(int) oflags; 1789 syscallarg(int) mode; 1790 } */ 1791 int error; 1792 int fd; 1793 1794 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1795 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1796 1797 if (error == 0) 1798 *retval = fd; 1799 1800 return error; 1801 } 1802 1803 static void 1804 vfs__fhfree(fhandle_t *fhp) 1805 { 1806 size_t fhsize; 1807 1808 fhsize = FHANDLE_SIZE(fhp); 1809 kmem_free(fhp, fhsize); 1810 } 1811 1812 /* 1813 * vfs_composefh: compose a filehandle. 1814 */ 1815 1816 int 1817 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1818 { 1819 struct mount *mp; 1820 struct fid *fidp; 1821 int error; 1822 size_t needfhsize; 1823 size_t fidsize; 1824 1825 mp = vp->v_mount; 1826 fidp = NULL; 1827 if (*fh_size < FHANDLE_SIZE_MIN) { 1828 fidsize = 0; 1829 } else { 1830 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1831 if (fhp != NULL) { 1832 memset(fhp, 0, *fh_size); 1833 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1834 fidp = &fhp->fh_fid; 1835 } 1836 } 1837 error = VFS_VPTOFH(vp, fidp, &fidsize); 1838 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1839 if (error == 0 && *fh_size < needfhsize) { 1840 error = E2BIG; 1841 } 1842 *fh_size = needfhsize; 1843 return error; 1844 } 1845 1846 int 1847 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1848 { 1849 struct mount *mp; 1850 fhandle_t *fhp; 1851 size_t fhsize; 1852 size_t fidsize; 1853 int error; 1854 1855 mp = vp->v_mount; 1856 fidsize = 0; 1857 error = VFS_VPTOFH(vp, NULL, &fidsize); 1858 KASSERT(error != 0); 1859 if (error != E2BIG) { 1860 goto out; 1861 } 1862 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1863 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1864 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1865 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1866 if (error == 0) { 1867 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1868 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1869 *fhpp = fhp; 1870 } else { 1871 kmem_free(fhp, fhsize); 1872 } 1873 out: 1874 return error; 1875 } 1876 1877 void 1878 vfs_composefh_free(fhandle_t *fhp) 1879 { 1880 1881 vfs__fhfree(fhp); 1882 } 1883 1884 /* 1885 * vfs_fhtovp: lookup a vnode by a filehandle. 1886 */ 1887 1888 int 1889 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1890 { 1891 struct mount *mp; 1892 int error; 1893 1894 *vpp = NULL; 1895 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1896 if (mp == NULL) { 1897 error = ESTALE; 1898 goto out; 1899 } 1900 if (mp->mnt_op->vfs_fhtovp == NULL) { 1901 error = EOPNOTSUPP; 1902 goto out; 1903 } 1904 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 1905 out: 1906 return error; 1907 } 1908 1909 /* 1910 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1911 * the needed size. 1912 */ 1913 1914 int 1915 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1916 { 1917 fhandle_t *fhp; 1918 int error; 1919 1920 if (fhsize > FHANDLE_SIZE_MAX) { 1921 return EINVAL; 1922 } 1923 if (fhsize < FHANDLE_SIZE_MIN) { 1924 return EINVAL; 1925 } 1926 again: 1927 fhp = kmem_alloc(fhsize, KM_SLEEP); 1928 error = copyin(ufhp, fhp, fhsize); 1929 if (error == 0) { 1930 /* XXX this check shouldn't be here */ 1931 if (FHANDLE_SIZE(fhp) == fhsize) { 1932 *fhpp = fhp; 1933 return 0; 1934 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1935 /* 1936 * a kludge for nfsv2 padded handles. 1937 */ 1938 size_t sz; 1939 1940 sz = FHANDLE_SIZE(fhp); 1941 kmem_free(fhp, fhsize); 1942 fhsize = sz; 1943 goto again; 1944 } else { 1945 /* 1946 * userland told us wrong size. 1947 */ 1948 error = EINVAL; 1949 } 1950 } 1951 kmem_free(fhp, fhsize); 1952 return error; 1953 } 1954 1955 void 1956 vfs_copyinfh_free(fhandle_t *fhp) 1957 { 1958 1959 vfs__fhfree(fhp); 1960 } 1961 1962 /* 1963 * Get file handle system call 1964 */ 1965 int 1966 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1967 { 1968 /* { 1969 syscallarg(char *) fname; 1970 syscallarg(fhandle_t *) fhp; 1971 syscallarg(size_t *) fh_size; 1972 } */ 1973 struct vnode *vp; 1974 fhandle_t *fh; 1975 int error; 1976 struct pathbuf *pb; 1977 struct nameidata nd; 1978 size_t sz; 1979 size_t usz; 1980 1981 /* 1982 * Must be super user 1983 */ 1984 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1985 0, NULL, NULL, NULL); 1986 if (error) 1987 return (error); 1988 1989 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1990 if (error) { 1991 return error; 1992 } 1993 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1994 error = namei(&nd); 1995 if (error) { 1996 pathbuf_destroy(pb); 1997 return error; 1998 } 1999 vp = nd.ni_vp; 2000 pathbuf_destroy(pb); 2001 2002 error = vfs_composefh_alloc(vp, &fh); 2003 vput(vp); 2004 if (error != 0) { 2005 return error; 2006 } 2007 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2008 if (error != 0) { 2009 goto out; 2010 } 2011 sz = FHANDLE_SIZE(fh); 2012 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2013 if (error != 0) { 2014 goto out; 2015 } 2016 if (usz >= sz) { 2017 error = copyout(fh, SCARG(uap, fhp), sz); 2018 } else { 2019 error = E2BIG; 2020 } 2021 out: 2022 vfs_composefh_free(fh); 2023 return (error); 2024 } 2025 2026 /* 2027 * Open a file given a file handle. 2028 * 2029 * Check permissions, allocate an open file structure, 2030 * and call the device open routine if any. 2031 */ 2032 2033 int 2034 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2035 register_t *retval) 2036 { 2037 file_t *fp; 2038 struct vnode *vp = NULL; 2039 kauth_cred_t cred = l->l_cred; 2040 file_t *nfp; 2041 int indx, error; 2042 struct vattr va; 2043 fhandle_t *fh; 2044 int flags; 2045 proc_t *p; 2046 2047 p = curproc; 2048 2049 /* 2050 * Must be super user 2051 */ 2052 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2053 0, NULL, NULL, NULL))) 2054 return (error); 2055 2056 if (oflags & O_SEARCH) { 2057 oflags &= ~(int)O_SEARCH; 2058 } 2059 2060 flags = FFLAGS(oflags); 2061 if ((flags & (FREAD | FWRITE)) == 0) 2062 return (EINVAL); 2063 if ((flags & O_CREAT)) 2064 return (EINVAL); 2065 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2066 return (error); 2067 fp = nfp; 2068 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2069 if (error != 0) { 2070 goto bad; 2071 } 2072 error = vfs_fhtovp(fh, &vp); 2073 vfs_copyinfh_free(fh); 2074 if (error != 0) { 2075 goto bad; 2076 } 2077 2078 /* Now do an effective vn_open */ 2079 2080 if (vp->v_type == VSOCK) { 2081 error = EOPNOTSUPP; 2082 goto bad; 2083 } 2084 error = vn_openchk(vp, cred, flags); 2085 if (error != 0) 2086 goto bad; 2087 if (flags & O_TRUNC) { 2088 VOP_UNLOCK(vp); /* XXX */ 2089 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2090 vattr_null(&va); 2091 va.va_size = 0; 2092 error = VOP_SETATTR(vp, &va, cred); 2093 if (error) 2094 goto bad; 2095 } 2096 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2097 goto bad; 2098 if (flags & FWRITE) { 2099 mutex_enter(vp->v_interlock); 2100 vp->v_writecount++; 2101 mutex_exit(vp->v_interlock); 2102 } 2103 2104 /* done with modified vn_open, now finish what sys_open does. */ 2105 if ((error = open_setfp(l, fp, vp, indx, flags))) 2106 return error; 2107 2108 VOP_UNLOCK(vp); 2109 *retval = indx; 2110 fd_affix(p, fp, indx); 2111 return (0); 2112 2113 bad: 2114 fd_abort(p, fp, indx); 2115 if (vp != NULL) 2116 vput(vp); 2117 return (error); 2118 } 2119 2120 int 2121 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2122 { 2123 /* { 2124 syscallarg(const void *) fhp; 2125 syscallarg(size_t) fh_size; 2126 syscallarg(int) flags; 2127 } */ 2128 2129 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2130 SCARG(uap, flags), retval); 2131 } 2132 2133 int 2134 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2135 { 2136 int error; 2137 fhandle_t *fh; 2138 struct vnode *vp; 2139 2140 /* 2141 * Must be super user 2142 */ 2143 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2144 0, NULL, NULL, NULL))) 2145 return (error); 2146 2147 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2148 if (error != 0) 2149 return error; 2150 2151 error = vfs_fhtovp(fh, &vp); 2152 vfs_copyinfh_free(fh); 2153 if (error != 0) 2154 return error; 2155 2156 error = vn_stat(vp, sb); 2157 vput(vp); 2158 return error; 2159 } 2160 2161 2162 /* ARGSUSED */ 2163 int 2164 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2165 { 2166 /* { 2167 syscallarg(const void *) fhp; 2168 syscallarg(size_t) fh_size; 2169 syscallarg(struct stat *) sb; 2170 } */ 2171 struct stat sb; 2172 int error; 2173 2174 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2175 if (error) 2176 return error; 2177 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2178 } 2179 2180 int 2181 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2182 int flags) 2183 { 2184 fhandle_t *fh; 2185 struct mount *mp; 2186 struct vnode *vp; 2187 int error; 2188 2189 /* 2190 * Must be super user 2191 */ 2192 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2193 0, NULL, NULL, NULL))) 2194 return error; 2195 2196 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2197 if (error != 0) 2198 return error; 2199 2200 error = vfs_fhtovp(fh, &vp); 2201 vfs_copyinfh_free(fh); 2202 if (error != 0) 2203 return error; 2204 2205 mp = vp->v_mount; 2206 error = dostatvfs(mp, sb, l, flags, 1); 2207 vput(vp); 2208 return error; 2209 } 2210 2211 /* ARGSUSED */ 2212 int 2213 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval) 2214 { 2215 /* { 2216 syscallarg(const void *) fhp; 2217 syscallarg(size_t) fh_size; 2218 syscallarg(struct statvfs *) buf; 2219 syscallarg(int) flags; 2220 } */ 2221 struct statvfs *sb = STATVFSBUF_GET(); 2222 int error; 2223 2224 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2225 SCARG(uap, flags)); 2226 if (error == 0) 2227 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2228 STATVFSBUF_PUT(sb); 2229 return error; 2230 } 2231 2232 int 2233 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2234 dev_t dev) 2235 { 2236 2237 /* 2238 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2239 * in mode and dev=0. 2240 * 2241 * In all the other cases it's implementation defined behavior. 2242 */ 2243 2244 if ((mode & S_IFIFO) && dev == 0) 2245 return do_sys_mkfifoat(l, fdat, pathname, mode); 2246 else 2247 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2248 UIO_USERSPACE); 2249 } 2250 2251 /* 2252 * Create a special file. 2253 */ 2254 /* ARGSUSED */ 2255 int 2256 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2257 register_t *retval) 2258 { 2259 /* { 2260 syscallarg(const char *) path; 2261 syscallarg(mode_t) mode; 2262 syscallarg(dev_t) dev; 2263 } */ 2264 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2265 SCARG(uap, mode), SCARG(uap, dev)); 2266 } 2267 2268 int 2269 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2270 register_t *retval) 2271 { 2272 /* { 2273 syscallarg(int) fd; 2274 syscallarg(const char *) path; 2275 syscallarg(mode_t) mode; 2276 syscallarg(int) pad; 2277 syscallarg(dev_t) dev; 2278 } */ 2279 2280 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2281 SCARG(uap, mode), SCARG(uap, dev)); 2282 } 2283 2284 int 2285 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2286 enum uio_seg seg) 2287 { 2288 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2289 } 2290 2291 int 2292 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2293 dev_t dev, enum uio_seg seg) 2294 { 2295 struct proc *p = l->l_proc; 2296 struct vnode *vp; 2297 struct vattr vattr; 2298 int error, optype; 2299 struct pathbuf *pb; 2300 struct nameidata nd; 2301 const char *pathstring; 2302 2303 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2304 0, NULL, NULL, NULL)) != 0) 2305 return (error); 2306 2307 optype = VOP_MKNOD_DESCOFFSET; 2308 2309 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2310 if (error) { 2311 return error; 2312 } 2313 pathstring = pathbuf_stringcopy_get(pb); 2314 if (pathstring == NULL) { 2315 pathbuf_destroy(pb); 2316 return ENOMEM; 2317 } 2318 2319 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2320 2321 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2322 goto out; 2323 vp = nd.ni_vp; 2324 2325 if (vp != NULL) 2326 error = EEXIST; 2327 else { 2328 vattr_null(&vattr); 2329 /* We will read cwdi->cwdi_cmask unlocked. */ 2330 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2331 vattr.va_rdev = dev; 2332 2333 switch (mode & S_IFMT) { 2334 case S_IFMT: /* used by badsect to flag bad sectors */ 2335 vattr.va_type = VBAD; 2336 break; 2337 case S_IFCHR: 2338 vattr.va_type = VCHR; 2339 break; 2340 case S_IFBLK: 2341 vattr.va_type = VBLK; 2342 break; 2343 case S_IFWHT: 2344 optype = VOP_WHITEOUT_DESCOFFSET; 2345 break; 2346 case S_IFREG: 2347 #if NVERIEXEC > 0 2348 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2349 O_CREAT); 2350 #endif /* NVERIEXEC > 0 */ 2351 vattr.va_type = VREG; 2352 vattr.va_rdev = VNOVAL; 2353 optype = VOP_CREATE_DESCOFFSET; 2354 break; 2355 default: 2356 error = EINVAL; 2357 break; 2358 } 2359 2360 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2361 vattr.va_rdev == VNOVAL) 2362 error = EINVAL; 2363 } 2364 2365 if (!error) { 2366 switch (optype) { 2367 case VOP_WHITEOUT_DESCOFFSET: 2368 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2369 if (error) 2370 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2371 vput(nd.ni_dvp); 2372 break; 2373 2374 case VOP_MKNOD_DESCOFFSET: 2375 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2376 &nd.ni_cnd, &vattr); 2377 if (error == 0) 2378 vrele(nd.ni_vp); 2379 vput(nd.ni_dvp); 2380 break; 2381 2382 case VOP_CREATE_DESCOFFSET: 2383 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2384 &nd.ni_cnd, &vattr); 2385 if (error == 0) 2386 vrele(nd.ni_vp); 2387 vput(nd.ni_dvp); 2388 break; 2389 } 2390 } else { 2391 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2392 if (nd.ni_dvp == vp) 2393 vrele(nd.ni_dvp); 2394 else 2395 vput(nd.ni_dvp); 2396 if (vp) 2397 vrele(vp); 2398 } 2399 out: 2400 pathbuf_stringcopy_put(pb, pathstring); 2401 pathbuf_destroy(pb); 2402 return (error); 2403 } 2404 2405 /* 2406 * Create a named pipe. 2407 */ 2408 /* ARGSUSED */ 2409 int 2410 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2411 { 2412 /* { 2413 syscallarg(const char *) path; 2414 syscallarg(int) mode; 2415 } */ 2416 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2417 } 2418 2419 int 2420 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2421 register_t *retval) 2422 { 2423 /* { 2424 syscallarg(int) fd; 2425 syscallarg(const char *) path; 2426 syscallarg(int) mode; 2427 } */ 2428 2429 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2430 SCARG(uap, mode)); 2431 } 2432 2433 static int 2434 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2435 { 2436 struct proc *p = l->l_proc; 2437 struct vattr vattr; 2438 int error; 2439 struct pathbuf *pb; 2440 struct nameidata nd; 2441 2442 error = pathbuf_copyin(path, &pb); 2443 if (error) { 2444 return error; 2445 } 2446 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2447 2448 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2449 pathbuf_destroy(pb); 2450 return error; 2451 } 2452 if (nd.ni_vp != NULL) { 2453 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2454 if (nd.ni_dvp == nd.ni_vp) 2455 vrele(nd.ni_dvp); 2456 else 2457 vput(nd.ni_dvp); 2458 vrele(nd.ni_vp); 2459 pathbuf_destroy(pb); 2460 return (EEXIST); 2461 } 2462 vattr_null(&vattr); 2463 vattr.va_type = VFIFO; 2464 /* We will read cwdi->cwdi_cmask unlocked. */ 2465 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2466 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2467 if (error == 0) 2468 vrele(nd.ni_vp); 2469 vput(nd.ni_dvp); 2470 pathbuf_destroy(pb); 2471 return (error); 2472 } 2473 2474 /* 2475 * Make a hard file link. 2476 */ 2477 /* ARGSUSED */ 2478 int 2479 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2480 const char *link, int follow, register_t *retval) 2481 { 2482 struct vnode *vp; 2483 struct pathbuf *linkpb; 2484 struct nameidata nd; 2485 namei_simple_flags_t ns_flags; 2486 int error; 2487 2488 if (follow & AT_SYMLINK_FOLLOW) 2489 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2490 else 2491 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2492 2493 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2494 if (error != 0) 2495 return (error); 2496 error = pathbuf_copyin(link, &linkpb); 2497 if (error) { 2498 goto out1; 2499 } 2500 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2501 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2502 goto out2; 2503 if (nd.ni_vp) { 2504 error = EEXIST; 2505 goto abortop; 2506 } 2507 /* Prevent hard links on directories. */ 2508 if (vp->v_type == VDIR) { 2509 error = EPERM; 2510 goto abortop; 2511 } 2512 /* Prevent cross-mount operation. */ 2513 if (nd.ni_dvp->v_mount != vp->v_mount) { 2514 error = EXDEV; 2515 goto abortop; 2516 } 2517 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2518 VOP_UNLOCK(nd.ni_dvp); 2519 vrele(nd.ni_dvp); 2520 out2: 2521 pathbuf_destroy(linkpb); 2522 out1: 2523 vrele(vp); 2524 return (error); 2525 abortop: 2526 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2527 if (nd.ni_dvp == nd.ni_vp) 2528 vrele(nd.ni_dvp); 2529 else 2530 vput(nd.ni_dvp); 2531 if (nd.ni_vp != NULL) 2532 vrele(nd.ni_vp); 2533 goto out2; 2534 } 2535 2536 int 2537 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2538 { 2539 /* { 2540 syscallarg(const char *) path; 2541 syscallarg(const char *) link; 2542 } */ 2543 const char *path = SCARG(uap, path); 2544 const char *link = SCARG(uap, link); 2545 2546 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2547 AT_SYMLINK_FOLLOW, retval); 2548 } 2549 2550 int 2551 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2552 register_t *retval) 2553 { 2554 /* { 2555 syscallarg(int) fd1; 2556 syscallarg(const char *) name1; 2557 syscallarg(int) fd2; 2558 syscallarg(const char *) name2; 2559 syscallarg(int) flags; 2560 } */ 2561 int fd1 = SCARG(uap, fd1); 2562 const char *name1 = SCARG(uap, name1); 2563 int fd2 = SCARG(uap, fd2); 2564 const char *name2 = SCARG(uap, name2); 2565 int follow; 2566 2567 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2568 2569 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2570 } 2571 2572 2573 int 2574 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2575 { 2576 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2577 } 2578 2579 static int 2580 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2581 const char *link, enum uio_seg seg) 2582 { 2583 struct proc *p = curproc; 2584 struct vattr vattr; 2585 char *path; 2586 int error; 2587 size_t len; 2588 struct pathbuf *linkpb; 2589 struct nameidata nd; 2590 2591 KASSERT(l != NULL || fdat == AT_FDCWD); 2592 2593 path = PNBUF_GET(); 2594 if (seg == UIO_USERSPACE) { 2595 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2596 goto out1; 2597 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2598 goto out1; 2599 } else { 2600 len = strlen(patharg) + 1; 2601 KASSERT(len <= MAXPATHLEN); 2602 memcpy(path, patharg, len); 2603 linkpb = pathbuf_create(link); 2604 if (linkpb == NULL) { 2605 error = ENOMEM; 2606 goto out1; 2607 } 2608 } 2609 ktrkuser("symlink-target", path, len - 1); 2610 2611 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2612 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2613 goto out2; 2614 if (nd.ni_vp) { 2615 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2616 if (nd.ni_dvp == nd.ni_vp) 2617 vrele(nd.ni_dvp); 2618 else 2619 vput(nd.ni_dvp); 2620 vrele(nd.ni_vp); 2621 error = EEXIST; 2622 goto out2; 2623 } 2624 vattr_null(&vattr); 2625 vattr.va_type = VLNK; 2626 /* We will read cwdi->cwdi_cmask unlocked. */ 2627 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2628 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2629 if (error == 0) 2630 vrele(nd.ni_vp); 2631 vput(nd.ni_dvp); 2632 out2: 2633 pathbuf_destroy(linkpb); 2634 out1: 2635 PNBUF_PUT(path); 2636 return (error); 2637 } 2638 2639 /* 2640 * Make a symbolic link. 2641 */ 2642 /* ARGSUSED */ 2643 int 2644 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2645 { 2646 /* { 2647 syscallarg(const char *) path; 2648 syscallarg(const char *) link; 2649 } */ 2650 2651 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2652 UIO_USERSPACE); 2653 } 2654 2655 int 2656 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2657 register_t *retval) 2658 { 2659 /* { 2660 syscallarg(const char *) path1; 2661 syscallarg(int) fd; 2662 syscallarg(const char *) path2; 2663 } */ 2664 2665 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2666 SCARG(uap, path2), UIO_USERSPACE); 2667 } 2668 2669 /* 2670 * Delete a whiteout from the filesystem. 2671 */ 2672 /* ARGSUSED */ 2673 int 2674 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2675 { 2676 /* { 2677 syscallarg(const char *) path; 2678 } */ 2679 int error; 2680 struct pathbuf *pb; 2681 struct nameidata nd; 2682 2683 error = pathbuf_copyin(SCARG(uap, path), &pb); 2684 if (error) { 2685 return error; 2686 } 2687 2688 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2689 error = namei(&nd); 2690 if (error) { 2691 pathbuf_destroy(pb); 2692 return (error); 2693 } 2694 2695 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2696 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2697 if (nd.ni_dvp == nd.ni_vp) 2698 vrele(nd.ni_dvp); 2699 else 2700 vput(nd.ni_dvp); 2701 if (nd.ni_vp) 2702 vrele(nd.ni_vp); 2703 pathbuf_destroy(pb); 2704 return (EEXIST); 2705 } 2706 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2707 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2708 vput(nd.ni_dvp); 2709 pathbuf_destroy(pb); 2710 return (error); 2711 } 2712 2713 /* 2714 * Delete a name from the filesystem. 2715 */ 2716 /* ARGSUSED */ 2717 int 2718 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2719 { 2720 /* { 2721 syscallarg(const char *) path; 2722 } */ 2723 2724 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2725 } 2726 2727 int 2728 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2729 register_t *retval) 2730 { 2731 /* { 2732 syscallarg(int) fd; 2733 syscallarg(const char *) path; 2734 syscallarg(int) flag; 2735 } */ 2736 2737 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2738 SCARG(uap, flag), UIO_USERSPACE); 2739 } 2740 2741 int 2742 do_sys_unlink(const char *arg, enum uio_seg seg) 2743 { 2744 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2745 } 2746 2747 static int 2748 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2749 enum uio_seg seg) 2750 { 2751 struct vnode *vp; 2752 int error; 2753 struct pathbuf *pb; 2754 struct nameidata nd; 2755 const char *pathstring; 2756 2757 KASSERT(l != NULL || fdat == AT_FDCWD); 2758 2759 error = pathbuf_maybe_copyin(arg, seg, &pb); 2760 if (error) { 2761 return error; 2762 } 2763 pathstring = pathbuf_stringcopy_get(pb); 2764 if (pathstring == NULL) { 2765 pathbuf_destroy(pb); 2766 return ENOMEM; 2767 } 2768 2769 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2770 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2771 goto out; 2772 vp = nd.ni_vp; 2773 2774 /* 2775 * The root of a mounted filesystem cannot be deleted. 2776 */ 2777 if ((vp->v_vflag & VV_ROOT) != 0) { 2778 error = EBUSY; 2779 goto abort; 2780 } 2781 2782 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2783 error = EBUSY; 2784 goto abort; 2785 } 2786 2787 /* 2788 * No rmdir "." please. 2789 */ 2790 if (nd.ni_dvp == vp) { 2791 error = EINVAL; 2792 goto abort; 2793 } 2794 2795 /* 2796 * AT_REMOVEDIR is required to remove a directory 2797 */ 2798 if (vp->v_type == VDIR) { 2799 if (!(flags & AT_REMOVEDIR)) { 2800 error = EPERM; 2801 goto abort; 2802 } else { 2803 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2804 vput(nd.ni_dvp); 2805 goto out; 2806 } 2807 } 2808 2809 /* 2810 * Starting here we only deal with non directories. 2811 */ 2812 if (flags & AT_REMOVEDIR) { 2813 error = ENOTDIR; 2814 goto abort; 2815 } 2816 2817 #if NVERIEXEC > 0 2818 /* Handle remove requests for veriexec entries. */ 2819 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2820 goto abort; 2821 } 2822 #endif /* NVERIEXEC > 0 */ 2823 2824 #ifdef FILEASSOC 2825 (void)fileassoc_file_delete(vp); 2826 #endif /* FILEASSOC */ 2827 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2828 vput(nd.ni_dvp); 2829 goto out; 2830 2831 abort: 2832 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2833 if (nd.ni_dvp == vp) 2834 vrele(nd.ni_dvp); 2835 else 2836 vput(nd.ni_dvp); 2837 vput(vp); 2838 2839 out: 2840 pathbuf_stringcopy_put(pb, pathstring); 2841 pathbuf_destroy(pb); 2842 return (error); 2843 } 2844 2845 /* 2846 * Reposition read/write file offset. 2847 */ 2848 int 2849 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2850 { 2851 /* { 2852 syscallarg(int) fd; 2853 syscallarg(int) pad; 2854 syscallarg(off_t) offset; 2855 syscallarg(int) whence; 2856 } */ 2857 kauth_cred_t cred = l->l_cred; 2858 file_t *fp; 2859 struct vnode *vp; 2860 struct vattr vattr; 2861 off_t newoff; 2862 int error, fd; 2863 2864 fd = SCARG(uap, fd); 2865 2866 if ((fp = fd_getfile(fd)) == NULL) 2867 return (EBADF); 2868 2869 vp = fp->f_vnode; 2870 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2871 error = ESPIPE; 2872 goto out; 2873 } 2874 2875 vn_lock(vp, LK_SHARED | LK_RETRY); 2876 2877 switch (SCARG(uap, whence)) { 2878 case SEEK_CUR: 2879 newoff = fp->f_offset + SCARG(uap, offset); 2880 break; 2881 case SEEK_END: 2882 error = VOP_GETATTR(vp, &vattr, cred); 2883 if (error) { 2884 VOP_UNLOCK(vp); 2885 goto out; 2886 } 2887 newoff = SCARG(uap, offset) + vattr.va_size; 2888 break; 2889 case SEEK_SET: 2890 newoff = SCARG(uap, offset); 2891 break; 2892 default: 2893 error = EINVAL; 2894 VOP_UNLOCK(vp); 2895 goto out; 2896 } 2897 VOP_UNLOCK(vp); 2898 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2899 *(off_t *)retval = fp->f_offset = newoff; 2900 } 2901 out: 2902 fd_putfile(fd); 2903 return (error); 2904 } 2905 2906 /* 2907 * Positional read system call. 2908 */ 2909 int 2910 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2911 { 2912 /* { 2913 syscallarg(int) fd; 2914 syscallarg(void *) buf; 2915 syscallarg(size_t) nbyte; 2916 syscallarg(off_t) offset; 2917 } */ 2918 file_t *fp; 2919 struct vnode *vp; 2920 off_t offset; 2921 int error, fd = SCARG(uap, fd); 2922 2923 if ((fp = fd_getfile(fd)) == NULL) 2924 return (EBADF); 2925 2926 if ((fp->f_flag & FREAD) == 0) { 2927 fd_putfile(fd); 2928 return (EBADF); 2929 } 2930 2931 vp = fp->f_vnode; 2932 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2933 error = ESPIPE; 2934 goto out; 2935 } 2936 2937 offset = SCARG(uap, offset); 2938 2939 /* 2940 * XXX This works because no file systems actually 2941 * XXX take any action on the seek operation. 2942 */ 2943 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2944 goto out; 2945 2946 /* dofileread() will unuse the descriptor for us */ 2947 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2948 &offset, 0, retval)); 2949 2950 out: 2951 fd_putfile(fd); 2952 return (error); 2953 } 2954 2955 /* 2956 * Positional scatter read system call. 2957 */ 2958 int 2959 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2960 { 2961 /* { 2962 syscallarg(int) fd; 2963 syscallarg(const struct iovec *) iovp; 2964 syscallarg(int) iovcnt; 2965 syscallarg(off_t) offset; 2966 } */ 2967 off_t offset = SCARG(uap, offset); 2968 2969 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2970 SCARG(uap, iovcnt), &offset, 0, retval); 2971 } 2972 2973 /* 2974 * Positional write system call. 2975 */ 2976 int 2977 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2978 { 2979 /* { 2980 syscallarg(int) fd; 2981 syscallarg(const void *) buf; 2982 syscallarg(size_t) nbyte; 2983 syscallarg(off_t) offset; 2984 } */ 2985 file_t *fp; 2986 struct vnode *vp; 2987 off_t offset; 2988 int error, fd = SCARG(uap, fd); 2989 2990 if ((fp = fd_getfile(fd)) == NULL) 2991 return (EBADF); 2992 2993 if ((fp->f_flag & FWRITE) == 0) { 2994 fd_putfile(fd); 2995 return (EBADF); 2996 } 2997 2998 vp = fp->f_vnode; 2999 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 3000 error = ESPIPE; 3001 goto out; 3002 } 3003 3004 offset = SCARG(uap, offset); 3005 3006 /* 3007 * XXX This works because no file systems actually 3008 * XXX take any action on the seek operation. 3009 */ 3010 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 3011 goto out; 3012 3013 /* dofilewrite() will unuse the descriptor for us */ 3014 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3015 &offset, 0, retval)); 3016 3017 out: 3018 fd_putfile(fd); 3019 return (error); 3020 } 3021 3022 /* 3023 * Positional gather write system call. 3024 */ 3025 int 3026 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 3027 { 3028 /* { 3029 syscallarg(int) fd; 3030 syscallarg(const struct iovec *) iovp; 3031 syscallarg(int) iovcnt; 3032 syscallarg(off_t) offset; 3033 } */ 3034 off_t offset = SCARG(uap, offset); 3035 3036 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3037 SCARG(uap, iovcnt), &offset, 0, retval); 3038 } 3039 3040 /* 3041 * Check access permissions. 3042 */ 3043 int 3044 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 3045 { 3046 /* { 3047 syscallarg(const char *) path; 3048 syscallarg(int) flags; 3049 } */ 3050 3051 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3052 SCARG(uap, flags), 0); 3053 } 3054 3055 int 3056 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3057 int mode, int flags) 3058 { 3059 kauth_cred_t cred; 3060 struct vnode *vp; 3061 int error, nd_flag, vmode; 3062 struct pathbuf *pb; 3063 struct nameidata nd; 3064 3065 CTASSERT(F_OK == 0); 3066 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3067 /* nonsense mode */ 3068 return EINVAL; 3069 } 3070 3071 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3072 if (flags & AT_SYMLINK_NOFOLLOW) 3073 nd_flag &= ~FOLLOW; 3074 3075 error = pathbuf_copyin(path, &pb); 3076 if (error) 3077 return error; 3078 3079 NDINIT(&nd, LOOKUP, nd_flag, pb); 3080 3081 /* Override default credentials */ 3082 cred = kauth_cred_dup(l->l_cred); 3083 if (!(flags & AT_EACCESS)) { 3084 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3085 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3086 } 3087 nd.ni_cnd.cn_cred = cred; 3088 3089 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3090 pathbuf_destroy(pb); 3091 goto out; 3092 } 3093 vp = nd.ni_vp; 3094 pathbuf_destroy(pb); 3095 3096 /* Flags == 0 means only check for existence. */ 3097 if (mode) { 3098 vmode = 0; 3099 if (mode & R_OK) 3100 vmode |= VREAD; 3101 if (mode & W_OK) 3102 vmode |= VWRITE; 3103 if (mode & X_OK) 3104 vmode |= VEXEC; 3105 3106 error = VOP_ACCESS(vp, vmode, cred); 3107 if (!error && (vmode & VWRITE)) 3108 error = vn_writechk(vp); 3109 } 3110 vput(vp); 3111 out: 3112 kauth_cred_free(cred); 3113 return (error); 3114 } 3115 3116 int 3117 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3118 register_t *retval) 3119 { 3120 /* { 3121 syscallarg(int) fd; 3122 syscallarg(const char *) path; 3123 syscallarg(int) amode; 3124 syscallarg(int) flag; 3125 } */ 3126 3127 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3128 SCARG(uap, amode), SCARG(uap, flag)); 3129 } 3130 3131 /* 3132 * Common code for all sys_stat functions, including compat versions. 3133 */ 3134 int 3135 do_sys_stat(const char *userpath, unsigned int nd_flag, 3136 struct stat *sb) 3137 { 3138 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3139 } 3140 3141 int 3142 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3143 unsigned int nd_flag, struct stat *sb) 3144 { 3145 int error; 3146 struct pathbuf *pb; 3147 struct nameidata nd; 3148 3149 KASSERT(l != NULL || fdat == AT_FDCWD); 3150 3151 error = pathbuf_copyin(userpath, &pb); 3152 if (error) { 3153 return error; 3154 } 3155 3156 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3157 3158 error = fd_nameiat(l, fdat, &nd); 3159 if (error != 0) { 3160 pathbuf_destroy(pb); 3161 return error; 3162 } 3163 error = vn_stat(nd.ni_vp, sb); 3164 vput(nd.ni_vp); 3165 pathbuf_destroy(pb); 3166 return error; 3167 } 3168 3169 /* 3170 * Get file status; this version follows links. 3171 */ 3172 /* ARGSUSED */ 3173 int 3174 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3175 { 3176 /* { 3177 syscallarg(const char *) path; 3178 syscallarg(struct stat *) ub; 3179 } */ 3180 struct stat sb; 3181 int error; 3182 3183 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3184 if (error) 3185 return error; 3186 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3187 } 3188 3189 /* 3190 * Get file status; this version does not follow links. 3191 */ 3192 /* ARGSUSED */ 3193 int 3194 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3195 { 3196 /* { 3197 syscallarg(const char *) path; 3198 syscallarg(struct stat *) ub; 3199 } */ 3200 struct stat sb; 3201 int error; 3202 3203 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3204 if (error) 3205 return error; 3206 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3207 } 3208 3209 int 3210 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3211 register_t *retval) 3212 { 3213 /* { 3214 syscallarg(int) fd; 3215 syscallarg(const char *) path; 3216 syscallarg(struct stat *) buf; 3217 syscallarg(int) flag; 3218 } */ 3219 unsigned int nd_flag; 3220 struct stat sb; 3221 int error; 3222 3223 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3224 nd_flag = NOFOLLOW; 3225 else 3226 nd_flag = FOLLOW; 3227 3228 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3229 &sb); 3230 if (error) 3231 return error; 3232 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3233 } 3234 3235 static int 3236 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3237 { 3238 int error; 3239 struct pathbuf *pb; 3240 struct nameidata nd; 3241 3242 error = pathbuf_copyin(path, &pb); 3243 if (error) { 3244 return error; 3245 } 3246 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3247 if ((error = namei(&nd)) != 0) { 3248 pathbuf_destroy(pb); 3249 return error; 3250 } 3251 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3252 vput(nd.ni_vp); 3253 pathbuf_destroy(pb); 3254 return error; 3255 } 3256 3257 /* 3258 * Get configurable pathname variables. 3259 */ 3260 /* ARGSUSED */ 3261 int 3262 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3263 register_t *retval) 3264 { 3265 /* { 3266 syscallarg(const char *) path; 3267 syscallarg(int) name; 3268 } */ 3269 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3270 FOLLOW); 3271 } 3272 3273 /* ARGSUSED */ 3274 int 3275 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3276 register_t *retval) 3277 { 3278 /* { 3279 syscallarg(const char *) path; 3280 syscallarg(int) name; 3281 } */ 3282 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3283 NOFOLLOW); 3284 } 3285 3286 /* 3287 * Return target name of a symbolic link. 3288 */ 3289 /* ARGSUSED */ 3290 int 3291 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3292 register_t *retval) 3293 { 3294 /* { 3295 syscallarg(const char *) path; 3296 syscallarg(char *) buf; 3297 syscallarg(size_t) count; 3298 } */ 3299 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3300 SCARG(uap, buf), SCARG(uap, count), retval); 3301 } 3302 3303 static int 3304 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3305 size_t count, register_t *retval) 3306 { 3307 struct vnode *vp; 3308 struct iovec aiov; 3309 struct uio auio; 3310 int error; 3311 struct pathbuf *pb; 3312 struct nameidata nd; 3313 3314 error = pathbuf_copyin(path, &pb); 3315 if (error) { 3316 return error; 3317 } 3318 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 3319 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3320 pathbuf_destroy(pb); 3321 return error; 3322 } 3323 vp = nd.ni_vp; 3324 pathbuf_destroy(pb); 3325 if (vp->v_type != VLNK) 3326 error = EINVAL; 3327 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3328 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3329 aiov.iov_base = buf; 3330 aiov.iov_len = count; 3331 auio.uio_iov = &aiov; 3332 auio.uio_iovcnt = 1; 3333 auio.uio_offset = 0; 3334 auio.uio_rw = UIO_READ; 3335 KASSERT(l == curlwp); 3336 auio.uio_vmspace = l->l_proc->p_vmspace; 3337 auio.uio_resid = count; 3338 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3339 *retval = count - auio.uio_resid; 3340 } 3341 vput(vp); 3342 return (error); 3343 } 3344 3345 int 3346 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3347 register_t *retval) 3348 { 3349 /* { 3350 syscallarg(int) fd; 3351 syscallarg(const char *) path; 3352 syscallarg(char *) buf; 3353 syscallarg(size_t) bufsize; 3354 } */ 3355 3356 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3357 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3358 } 3359 3360 /* 3361 * Change flags of a file given a path name. 3362 */ 3363 /* ARGSUSED */ 3364 int 3365 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3366 { 3367 /* { 3368 syscallarg(const char *) path; 3369 syscallarg(u_long) flags; 3370 } */ 3371 struct vnode *vp; 3372 int error; 3373 3374 error = namei_simple_user(SCARG(uap, path), 3375 NSM_FOLLOW_TRYEMULROOT, &vp); 3376 if (error != 0) 3377 return (error); 3378 error = change_flags(vp, SCARG(uap, flags), l); 3379 vput(vp); 3380 return (error); 3381 } 3382 3383 /* 3384 * Change flags of a file given a file descriptor. 3385 */ 3386 /* ARGSUSED */ 3387 int 3388 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3389 { 3390 /* { 3391 syscallarg(int) fd; 3392 syscallarg(u_long) flags; 3393 } */ 3394 struct vnode *vp; 3395 file_t *fp; 3396 int error; 3397 3398 /* fd_getvnode() will use the descriptor for us */ 3399 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3400 return (error); 3401 vp = fp->f_vnode; 3402 error = change_flags(vp, SCARG(uap, flags), l); 3403 VOP_UNLOCK(vp); 3404 fd_putfile(SCARG(uap, fd)); 3405 return (error); 3406 } 3407 3408 /* 3409 * Change flags of a file given a path name; this version does 3410 * not follow links. 3411 */ 3412 int 3413 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3414 { 3415 /* { 3416 syscallarg(const char *) path; 3417 syscallarg(u_long) flags; 3418 } */ 3419 struct vnode *vp; 3420 int error; 3421 3422 error = namei_simple_user(SCARG(uap, path), 3423 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3424 if (error != 0) 3425 return (error); 3426 error = change_flags(vp, SCARG(uap, flags), l); 3427 vput(vp); 3428 return (error); 3429 } 3430 3431 /* 3432 * Common routine to change flags of a file. 3433 */ 3434 int 3435 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3436 { 3437 struct vattr vattr; 3438 int error; 3439 3440 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3441 3442 vattr_null(&vattr); 3443 vattr.va_flags = flags; 3444 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3445 3446 return (error); 3447 } 3448 3449 /* 3450 * Change mode of a file given path name; this version follows links. 3451 */ 3452 /* ARGSUSED */ 3453 int 3454 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3455 { 3456 /* { 3457 syscallarg(const char *) path; 3458 syscallarg(int) mode; 3459 } */ 3460 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3461 SCARG(uap, mode), 0); 3462 } 3463 3464 int 3465 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3466 { 3467 int error; 3468 struct vnode *vp; 3469 namei_simple_flags_t ns_flag; 3470 3471 if (flags & AT_SYMLINK_NOFOLLOW) 3472 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3473 else 3474 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3475 3476 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3477 if (error != 0) 3478 return error; 3479 3480 error = change_mode(vp, mode, l); 3481 3482 vrele(vp); 3483 3484 return (error); 3485 } 3486 3487 /* 3488 * Change mode of a file given a file descriptor. 3489 */ 3490 /* ARGSUSED */ 3491 int 3492 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3493 { 3494 /* { 3495 syscallarg(int) fd; 3496 syscallarg(int) mode; 3497 } */ 3498 file_t *fp; 3499 int error; 3500 3501 /* fd_getvnode() will use the descriptor for us */ 3502 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3503 return (error); 3504 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3505 fd_putfile(SCARG(uap, fd)); 3506 return (error); 3507 } 3508 3509 int 3510 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3511 register_t *retval) 3512 { 3513 /* { 3514 syscallarg(int) fd; 3515 syscallarg(const char *) path; 3516 syscallarg(int) mode; 3517 syscallarg(int) flag; 3518 } */ 3519 3520 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3521 SCARG(uap, mode), SCARG(uap, flag)); 3522 } 3523 3524 /* 3525 * Change mode of a file given path name; this version does not follow links. 3526 */ 3527 /* ARGSUSED */ 3528 int 3529 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3530 { 3531 /* { 3532 syscallarg(const char *) path; 3533 syscallarg(int) mode; 3534 } */ 3535 int error; 3536 struct vnode *vp; 3537 3538 error = namei_simple_user(SCARG(uap, path), 3539 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3540 if (error != 0) 3541 return (error); 3542 3543 error = change_mode(vp, SCARG(uap, mode), l); 3544 3545 vrele(vp); 3546 return (error); 3547 } 3548 3549 /* 3550 * Common routine to set mode given a vnode. 3551 */ 3552 static int 3553 change_mode(struct vnode *vp, int mode, struct lwp *l) 3554 { 3555 struct vattr vattr; 3556 int error; 3557 3558 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3559 vattr_null(&vattr); 3560 vattr.va_mode = mode & ALLPERMS; 3561 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3562 VOP_UNLOCK(vp); 3563 return (error); 3564 } 3565 3566 /* 3567 * Set ownership given a path name; this version follows links. 3568 */ 3569 /* ARGSUSED */ 3570 int 3571 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3572 { 3573 /* { 3574 syscallarg(const char *) path; 3575 syscallarg(uid_t) uid; 3576 syscallarg(gid_t) gid; 3577 } */ 3578 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3579 SCARG(uap, gid), 0); 3580 } 3581 3582 int 3583 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3584 gid_t gid, int flags) 3585 { 3586 int error; 3587 struct vnode *vp; 3588 namei_simple_flags_t ns_flag; 3589 3590 if (flags & AT_SYMLINK_NOFOLLOW) 3591 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3592 else 3593 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3594 3595 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3596 if (error != 0) 3597 return error; 3598 3599 error = change_owner(vp, uid, gid, l, 0); 3600 3601 vrele(vp); 3602 3603 return (error); 3604 } 3605 3606 /* 3607 * Set ownership given a path name; this version follows links. 3608 * Provides POSIX semantics. 3609 */ 3610 /* ARGSUSED */ 3611 int 3612 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3613 { 3614 /* { 3615 syscallarg(const char *) path; 3616 syscallarg(uid_t) uid; 3617 syscallarg(gid_t) gid; 3618 } */ 3619 int error; 3620 struct vnode *vp; 3621 3622 error = namei_simple_user(SCARG(uap, path), 3623 NSM_FOLLOW_TRYEMULROOT, &vp); 3624 if (error != 0) 3625 return (error); 3626 3627 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3628 3629 vrele(vp); 3630 return (error); 3631 } 3632 3633 /* 3634 * Set ownership given a file descriptor. 3635 */ 3636 /* ARGSUSED */ 3637 int 3638 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3639 { 3640 /* { 3641 syscallarg(int) fd; 3642 syscallarg(uid_t) uid; 3643 syscallarg(gid_t) gid; 3644 } */ 3645 int error; 3646 file_t *fp; 3647 3648 /* fd_getvnode() will use the descriptor for us */ 3649 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3650 return (error); 3651 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3652 l, 0); 3653 fd_putfile(SCARG(uap, fd)); 3654 return (error); 3655 } 3656 3657 int 3658 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3659 register_t *retval) 3660 { 3661 /* { 3662 syscallarg(int) fd; 3663 syscallarg(const char *) path; 3664 syscallarg(uid_t) owner; 3665 syscallarg(gid_t) group; 3666 syscallarg(int) flag; 3667 } */ 3668 3669 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3670 SCARG(uap, owner), SCARG(uap, group), 3671 SCARG(uap, flag)); 3672 } 3673 3674 /* 3675 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3676 */ 3677 /* ARGSUSED */ 3678 int 3679 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3680 { 3681 /* { 3682 syscallarg(int) fd; 3683 syscallarg(uid_t) uid; 3684 syscallarg(gid_t) gid; 3685 } */ 3686 int error; 3687 file_t *fp; 3688 3689 /* fd_getvnode() will use the descriptor for us */ 3690 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3691 return (error); 3692 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3693 l, 1); 3694 fd_putfile(SCARG(uap, fd)); 3695 return (error); 3696 } 3697 3698 /* 3699 * Set ownership given a path name; this version does not follow links. 3700 */ 3701 /* ARGSUSED */ 3702 int 3703 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3704 { 3705 /* { 3706 syscallarg(const char *) path; 3707 syscallarg(uid_t) uid; 3708 syscallarg(gid_t) gid; 3709 } */ 3710 int error; 3711 struct vnode *vp; 3712 3713 error = namei_simple_user(SCARG(uap, path), 3714 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3715 if (error != 0) 3716 return (error); 3717 3718 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3719 3720 vrele(vp); 3721 return (error); 3722 } 3723 3724 /* 3725 * Set ownership given a path name; this version does not follow links. 3726 * Provides POSIX/XPG semantics. 3727 */ 3728 /* ARGSUSED */ 3729 int 3730 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3731 { 3732 /* { 3733 syscallarg(const char *) path; 3734 syscallarg(uid_t) uid; 3735 syscallarg(gid_t) gid; 3736 } */ 3737 int error; 3738 struct vnode *vp; 3739 3740 error = namei_simple_user(SCARG(uap, path), 3741 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3742 if (error != 0) 3743 return (error); 3744 3745 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3746 3747 vrele(vp); 3748 return (error); 3749 } 3750 3751 /* 3752 * Common routine to set ownership given a vnode. 3753 */ 3754 static int 3755 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3756 int posix_semantics) 3757 { 3758 struct vattr vattr; 3759 mode_t newmode; 3760 int error; 3761 3762 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3763 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3764 goto out; 3765 3766 #define CHANGED(x) ((int)(x) != -1) 3767 newmode = vattr.va_mode; 3768 if (posix_semantics) { 3769 /* 3770 * POSIX/XPG semantics: if the caller is not the super-user, 3771 * clear set-user-id and set-group-id bits. Both POSIX and 3772 * the XPG consider the behaviour for calls by the super-user 3773 * implementation-defined; we leave the set-user-id and set- 3774 * group-id settings intact in that case. 3775 */ 3776 if (vattr.va_mode & S_ISUID) { 3777 if (kauth_authorize_vnode(l->l_cred, 3778 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3779 newmode &= ~S_ISUID; 3780 } 3781 if (vattr.va_mode & S_ISGID) { 3782 if (kauth_authorize_vnode(l->l_cred, 3783 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3784 newmode &= ~S_ISGID; 3785 } 3786 } else { 3787 /* 3788 * NetBSD semantics: when changing owner and/or group, 3789 * clear the respective bit(s). 3790 */ 3791 if (CHANGED(uid)) 3792 newmode &= ~S_ISUID; 3793 if (CHANGED(gid)) 3794 newmode &= ~S_ISGID; 3795 } 3796 /* Update va_mode iff altered. */ 3797 if (vattr.va_mode == newmode) 3798 newmode = VNOVAL; 3799 3800 vattr_null(&vattr); 3801 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3802 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3803 vattr.va_mode = newmode; 3804 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3805 #undef CHANGED 3806 3807 out: 3808 VOP_UNLOCK(vp); 3809 return (error); 3810 } 3811 3812 /* 3813 * Set the access and modification times given a path name; this 3814 * version follows links. 3815 */ 3816 /* ARGSUSED */ 3817 int 3818 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3819 register_t *retval) 3820 { 3821 /* { 3822 syscallarg(const char *) path; 3823 syscallarg(const struct timeval *) tptr; 3824 } */ 3825 3826 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3827 SCARG(uap, tptr), UIO_USERSPACE); 3828 } 3829 3830 /* 3831 * Set the access and modification times given a file descriptor. 3832 */ 3833 /* ARGSUSED */ 3834 int 3835 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3836 register_t *retval) 3837 { 3838 /* { 3839 syscallarg(int) fd; 3840 syscallarg(const struct timeval *) tptr; 3841 } */ 3842 int error; 3843 file_t *fp; 3844 3845 /* fd_getvnode() will use the descriptor for us */ 3846 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3847 return (error); 3848 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3849 UIO_USERSPACE); 3850 fd_putfile(SCARG(uap, fd)); 3851 return (error); 3852 } 3853 3854 int 3855 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3856 register_t *retval) 3857 { 3858 /* { 3859 syscallarg(int) fd; 3860 syscallarg(const struct timespec *) tptr; 3861 } */ 3862 int error; 3863 file_t *fp; 3864 3865 /* fd_getvnode() will use the descriptor for us */ 3866 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3867 return (error); 3868 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3869 SCARG(uap, tptr), UIO_USERSPACE); 3870 fd_putfile(SCARG(uap, fd)); 3871 return (error); 3872 } 3873 3874 /* 3875 * Set the access and modification times given a path name; this 3876 * version does not follow links. 3877 */ 3878 int 3879 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3880 register_t *retval) 3881 { 3882 /* { 3883 syscallarg(const char *) path; 3884 syscallarg(const struct timeval *) tptr; 3885 } */ 3886 3887 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3888 SCARG(uap, tptr), UIO_USERSPACE); 3889 } 3890 3891 int 3892 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3893 register_t *retval) 3894 { 3895 /* { 3896 syscallarg(int) fd; 3897 syscallarg(const char *) path; 3898 syscallarg(const struct timespec *) tptr; 3899 syscallarg(int) flag; 3900 } */ 3901 int follow; 3902 const struct timespec *tptr; 3903 int error; 3904 3905 tptr = SCARG(uap, tptr); 3906 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3907 3908 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3909 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3910 3911 return error; 3912 } 3913 3914 /* 3915 * Common routine to set access and modification times given a vnode. 3916 */ 3917 int 3918 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3919 const struct timespec *tptr, enum uio_seg seg) 3920 { 3921 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3922 } 3923 3924 int 3925 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3926 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3927 { 3928 struct vattr vattr; 3929 int error, dorele = 0; 3930 namei_simple_flags_t sflags; 3931 bool vanull, setbirthtime; 3932 struct timespec ts[2]; 3933 3934 KASSERT(l != NULL || fdat == AT_FDCWD); 3935 3936 /* 3937 * I have checked all callers and they pass either FOLLOW, 3938 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3939 * is 0. More to the point, they don't pass anything else. 3940 * Let's keep it that way at least until the namei interfaces 3941 * are fully sanitized. 3942 */ 3943 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3944 sflags = (flag == FOLLOW) ? 3945 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3946 3947 if (tptr == NULL) { 3948 vanull = true; 3949 nanotime(&ts[0]); 3950 ts[1] = ts[0]; 3951 } else { 3952 vanull = false; 3953 if (seg != UIO_SYSSPACE) { 3954 error = copyin(tptr, ts, sizeof (ts)); 3955 if (error != 0) 3956 return error; 3957 } else { 3958 ts[0] = tptr[0]; 3959 ts[1] = tptr[1]; 3960 } 3961 } 3962 3963 if (ts[0].tv_nsec == UTIME_NOW) { 3964 nanotime(&ts[0]); 3965 if (ts[1].tv_nsec == UTIME_NOW) { 3966 vanull = true; 3967 ts[1] = ts[0]; 3968 } 3969 } else if (ts[1].tv_nsec == UTIME_NOW) 3970 nanotime(&ts[1]); 3971 3972 if (vp == NULL) { 3973 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3974 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3975 if (error != 0) 3976 return error; 3977 dorele = 1; 3978 } 3979 3980 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3981 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3982 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3983 vattr_null(&vattr); 3984 3985 if (ts[0].tv_nsec != UTIME_OMIT) 3986 vattr.va_atime = ts[0]; 3987 3988 if (ts[1].tv_nsec != UTIME_OMIT) { 3989 vattr.va_mtime = ts[1]; 3990 if (setbirthtime) 3991 vattr.va_birthtime = ts[1]; 3992 } 3993 3994 if (vanull) 3995 vattr.va_vaflags |= VA_UTIMES_NULL; 3996 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3997 VOP_UNLOCK(vp); 3998 3999 if (dorele != 0) 4000 vrele(vp); 4001 4002 return error; 4003 } 4004 4005 int 4006 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4007 const struct timeval *tptr, enum uio_seg seg) 4008 { 4009 struct timespec ts[2]; 4010 struct timespec *tsptr = NULL; 4011 int error; 4012 4013 if (tptr != NULL) { 4014 struct timeval tv[2]; 4015 4016 if (seg != UIO_SYSSPACE) { 4017 error = copyin(tptr, tv, sizeof(tv)); 4018 if (error != 0) 4019 return error; 4020 tptr = tv; 4021 } 4022 4023 if ((tptr[0].tv_usec == UTIME_NOW) || 4024 (tptr[0].tv_usec == UTIME_OMIT)) 4025 ts[0].tv_nsec = tptr[0].tv_usec; 4026 else { 4027 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4028 return EINVAL; 4029 4030 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4031 } 4032 4033 if ((tptr[1].tv_usec == UTIME_NOW) || 4034 (tptr[1].tv_usec == UTIME_OMIT)) 4035 ts[1].tv_nsec = tptr[1].tv_usec; 4036 else { 4037 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4038 return EINVAL; 4039 4040 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4041 } 4042 4043 tsptr = &ts[0]; 4044 } 4045 4046 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4047 } 4048 4049 /* 4050 * Truncate a file given its path name. 4051 */ 4052 /* ARGSUSED */ 4053 int 4054 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 4055 { 4056 /* { 4057 syscallarg(const char *) path; 4058 syscallarg(int) pad; 4059 syscallarg(off_t) length; 4060 } */ 4061 struct vnode *vp; 4062 struct vattr vattr; 4063 int error; 4064 4065 if (SCARG(uap, length) < 0) 4066 return EINVAL; 4067 4068 error = namei_simple_user(SCARG(uap, path), 4069 NSM_FOLLOW_TRYEMULROOT, &vp); 4070 if (error != 0) 4071 return (error); 4072 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4073 if (vp->v_type == VDIR) 4074 error = EISDIR; 4075 else if ((error = vn_writechk(vp)) == 0 && 4076 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4077 vattr_null(&vattr); 4078 vattr.va_size = SCARG(uap, length); 4079 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4080 } 4081 vput(vp); 4082 return (error); 4083 } 4084 4085 /* 4086 * Truncate a file given a file descriptor. 4087 */ 4088 /* ARGSUSED */ 4089 int 4090 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 4091 { 4092 /* { 4093 syscallarg(int) fd; 4094 syscallarg(int) pad; 4095 syscallarg(off_t) length; 4096 } */ 4097 struct vattr vattr; 4098 struct vnode *vp; 4099 file_t *fp; 4100 int error; 4101 4102 if (SCARG(uap, length) < 0) 4103 return EINVAL; 4104 4105 /* fd_getvnode() will use the descriptor for us */ 4106 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4107 return (error); 4108 if ((fp->f_flag & FWRITE) == 0) { 4109 error = EINVAL; 4110 goto out; 4111 } 4112 vp = fp->f_vnode; 4113 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4114 if (vp->v_type == VDIR) 4115 error = EISDIR; 4116 else if ((error = vn_writechk(vp)) == 0) { 4117 vattr_null(&vattr); 4118 vattr.va_size = SCARG(uap, length); 4119 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 4120 } 4121 VOP_UNLOCK(vp); 4122 out: 4123 fd_putfile(SCARG(uap, fd)); 4124 return (error); 4125 } 4126 4127 /* 4128 * Sync an open file. 4129 */ 4130 /* ARGSUSED */ 4131 int 4132 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4133 { 4134 /* { 4135 syscallarg(int) fd; 4136 } */ 4137 struct vnode *vp; 4138 file_t *fp; 4139 int error; 4140 4141 /* fd_getvnode() will use the descriptor for us */ 4142 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4143 return (error); 4144 vp = fp->f_vnode; 4145 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4146 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4147 VOP_UNLOCK(vp); 4148 fd_putfile(SCARG(uap, fd)); 4149 return (error); 4150 } 4151 4152 /* 4153 * Sync a range of file data. API modeled after that found in AIX. 4154 * 4155 * FDATASYNC indicates that we need only save enough metadata to be able 4156 * to re-read the written data. 4157 */ 4158 /* ARGSUSED */ 4159 int 4160 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4161 { 4162 /* { 4163 syscallarg(int) fd; 4164 syscallarg(int) flags; 4165 syscallarg(off_t) start; 4166 syscallarg(off_t) length; 4167 } */ 4168 struct vnode *vp; 4169 file_t *fp; 4170 int flags, nflags; 4171 off_t s, e, len; 4172 int error; 4173 4174 /* fd_getvnode() will use the descriptor for us */ 4175 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4176 return (error); 4177 4178 if ((fp->f_flag & FWRITE) == 0) { 4179 error = EBADF; 4180 goto out; 4181 } 4182 4183 flags = SCARG(uap, flags); 4184 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4185 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4186 error = EINVAL; 4187 goto out; 4188 } 4189 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4190 if (flags & FDATASYNC) 4191 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4192 else 4193 nflags = FSYNC_WAIT; 4194 if (flags & FDISKSYNC) 4195 nflags |= FSYNC_CACHE; 4196 4197 len = SCARG(uap, length); 4198 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4199 if (len) { 4200 s = SCARG(uap, start); 4201 e = s + len; 4202 if (e < s) { 4203 error = EINVAL; 4204 goto out; 4205 } 4206 } else { 4207 e = 0; 4208 s = 0; 4209 } 4210 4211 vp = fp->f_vnode; 4212 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4213 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4214 VOP_UNLOCK(vp); 4215 out: 4216 fd_putfile(SCARG(uap, fd)); 4217 return (error); 4218 } 4219 4220 /* 4221 * Sync the data of an open file. 4222 */ 4223 /* ARGSUSED */ 4224 int 4225 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4226 { 4227 /* { 4228 syscallarg(int) fd; 4229 } */ 4230 struct vnode *vp; 4231 file_t *fp; 4232 int error; 4233 4234 /* fd_getvnode() will use the descriptor for us */ 4235 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4236 return (error); 4237 vp = fp->f_vnode; 4238 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4239 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4240 VOP_UNLOCK(vp); 4241 fd_putfile(SCARG(uap, fd)); 4242 return (error); 4243 } 4244 4245 /* 4246 * Rename files, (standard) BSD semantics frontend. 4247 */ 4248 /* ARGSUSED */ 4249 int 4250 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4251 { 4252 /* { 4253 syscallarg(const char *) from; 4254 syscallarg(const char *) to; 4255 } */ 4256 4257 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4258 SCARG(uap, to), UIO_USERSPACE, 0)); 4259 } 4260 4261 int 4262 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4263 register_t *retval) 4264 { 4265 /* { 4266 syscallarg(int) fromfd; 4267 syscallarg(const char *) from; 4268 syscallarg(int) tofd; 4269 syscallarg(const char *) to; 4270 } */ 4271 4272 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4273 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4274 } 4275 4276 /* 4277 * Rename files, POSIX semantics frontend. 4278 */ 4279 /* ARGSUSED */ 4280 int 4281 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4282 { 4283 /* { 4284 syscallarg(const char *) from; 4285 syscallarg(const char *) to; 4286 } */ 4287 4288 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4289 SCARG(uap, to), UIO_USERSPACE, 1)); 4290 } 4291 4292 /* 4293 * Rename files. Source and destination must either both be directories, 4294 * or both not be directories. If target is a directory, it must be empty. 4295 * If `from' and `to' refer to the same object, the value of the `retain' 4296 * argument is used to determine whether `from' will be 4297 * 4298 * (retain == 0) deleted unless `from' and `to' refer to the same 4299 * object in the file system's name space (BSD). 4300 * (retain == 1) always retained (POSIX). 4301 * 4302 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4303 */ 4304 int 4305 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4306 { 4307 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4308 } 4309 4310 static int 4311 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4312 const char *to, enum uio_seg seg, int retain) 4313 { 4314 struct pathbuf *fpb, *tpb; 4315 struct nameidata fnd, tnd; 4316 struct vnode *fdvp, *fvp; 4317 struct vnode *tdvp, *tvp; 4318 struct mount *mp, *tmp; 4319 int error; 4320 4321 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4322 4323 error = pathbuf_maybe_copyin(from, seg, &fpb); 4324 if (error) 4325 goto out0; 4326 KASSERT(fpb != NULL); 4327 4328 error = pathbuf_maybe_copyin(to, seg, &tpb); 4329 if (error) 4330 goto out1; 4331 KASSERT(tpb != NULL); 4332 4333 /* 4334 * Lookup from. 4335 * 4336 * XXX LOCKPARENT is wrong because we don't actually want it 4337 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4338 * insane, so for the time being we need to leave it like this. 4339 */ 4340 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4341 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4342 goto out2; 4343 4344 /* 4345 * Pull out the important results of the lookup, fdvp and fvp. 4346 * Of course, fvp is bogus because we're about to unlock fdvp. 4347 */ 4348 fdvp = fnd.ni_dvp; 4349 fvp = fnd.ni_vp; 4350 mp = fdvp->v_mount; 4351 KASSERT(fdvp != NULL); 4352 KASSERT(fvp != NULL); 4353 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4354 /* 4355 * Bracket the operation with fstrans_start()/fstrans_done(). 4356 * 4357 * Inside the bracket this file system cannot be unmounted so 4358 * a vnode on this file system cannot change its v_mount. 4359 * A vnode on another file system may still change to dead mount. 4360 */ 4361 fstrans_start(mp); 4362 4363 /* 4364 * Make sure neither fdvp nor fvp is locked. 4365 */ 4366 if (fdvp != fvp) 4367 VOP_UNLOCK(fdvp); 4368 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4369 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4370 4371 /* 4372 * Reject renaming `.' and `..'. Can't do this until after 4373 * namei because we need namei's parsing to find the final 4374 * component name. (namei should just leave us with the final 4375 * component name and not look it up itself, but anyway...) 4376 * 4377 * This was here before because we used to relookup from 4378 * instead of to and relookup requires the caller to check 4379 * this, but now file systems may depend on this check, so we 4380 * must retain it until the file systems are all rototilled. 4381 */ 4382 if (((fnd.ni_cnd.cn_namelen == 1) && 4383 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4384 ((fnd.ni_cnd.cn_namelen == 2) && 4385 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4386 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4387 error = EINVAL; /* XXX EISDIR? */ 4388 goto abort0; 4389 } 4390 4391 /* 4392 * Lookup to. 4393 * 4394 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4395 * fvp here to decide whether to add CREATEDIR is a load of 4396 * bollocks because fvp might be the wrong node by now, since 4397 * fdvp is unlocked. 4398 * 4399 * XXX Why not pass CREATEDIR always? 4400 */ 4401 NDINIT(&tnd, RENAME, 4402 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4403 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4404 tpb); 4405 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4406 goto abort0; 4407 4408 /* 4409 * Pull out the important results of the lookup, tdvp and tvp. 4410 * Of course, tvp is bogus because we're about to unlock tdvp. 4411 */ 4412 tdvp = tnd.ni_dvp; 4413 tvp = tnd.ni_vp; 4414 KASSERT(tdvp != NULL); 4415 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4416 4417 if (fvp->v_type == VDIR) 4418 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4419 /* 4420 * Make sure neither tdvp nor tvp is locked. 4421 */ 4422 if (tdvp != tvp) 4423 VOP_UNLOCK(tdvp); 4424 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4425 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4426 4427 /* 4428 * Reject renaming onto `.' or `..'. relookup is unhappy with 4429 * these, which is why we must do this here. Once upon a time 4430 * we relooked up from instead of to, and consequently didn't 4431 * need this check, but now that we relookup to instead of 4432 * from, we need this; and we shall need it forever forward 4433 * until the VOP_RENAME protocol changes, because file systems 4434 * will no doubt begin to depend on this check. 4435 */ 4436 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4437 error = EISDIR; 4438 goto abort1; 4439 } 4440 if ((tnd.ni_cnd.cn_namelen == 2) && 4441 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4442 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4443 error = EINVAL; 4444 goto abort1; 4445 } 4446 4447 /* 4448 * Make sure the mount points match. Although we don't hold 4449 * any vnode locks, the v_mount on fdvp file system are stable. 4450 * 4451 * Unmounting another file system at an inopportune moment may 4452 * cause tdvp to disappear and change its v_mount to dead. 4453 * 4454 * So in either case different v_mount means cross-device rename. 4455 */ 4456 KASSERT(mp != NULL); 4457 tmp = tdvp->v_mount; 4458 4459 if (mp != tmp) { 4460 error = EXDEV; 4461 goto abort1; 4462 } 4463 4464 /* 4465 * Take the vfs rename lock to avoid cross-directory screw cases. 4466 * Nothing is locked currently, so taking this lock is safe. 4467 */ 4468 error = VFS_RENAMELOCK_ENTER(mp); 4469 if (error) 4470 goto abort1; 4471 4472 /* 4473 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4474 * and nothing is locked except for the vfs rename lock. 4475 * 4476 * The next step is a little rain dance to conform to the 4477 * insane lock protocol, even though it does nothing to ward 4478 * off race conditions. 4479 * 4480 * We need tdvp and tvp to be locked. However, because we have 4481 * unlocked tdvp in order to hold no locks while we take the 4482 * vfs rename lock, tvp may be wrong here, and we can't safely 4483 * lock it even if the sensible file systems will just unlock 4484 * it straight away. Consequently, we must lock tdvp and then 4485 * relookup tvp to get it locked. 4486 * 4487 * Finally, because the VOP_RENAME protocol is brain-damaged 4488 * and various file systems insanely depend on the semantics of 4489 * this brain damage, the lookup of to must be the last lookup 4490 * before VOP_RENAME. 4491 */ 4492 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4493 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4494 if (error) 4495 goto abort2; 4496 4497 /* 4498 * Drop the old tvp and pick up the new one -- which might be 4499 * the same, but that doesn't matter to us. After this, tdvp 4500 * and tvp should both be locked. 4501 */ 4502 if (tvp != NULL) 4503 vrele(tvp); 4504 tvp = tnd.ni_vp; 4505 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4506 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4507 4508 /* 4509 * The old do_sys_rename had various consistency checks here 4510 * involving fvp and tvp. fvp is bogus already here, and tvp 4511 * will become bogus soon in any sensible file system, so the 4512 * only purpose in putting these checks here is to give lip 4513 * service to these screw cases and to acknowledge that they 4514 * exist, not actually to handle them, but here you go 4515 * anyway... 4516 */ 4517 4518 /* 4519 * Acknowledge that directories and non-directories aren't 4520 * suposed to mix. 4521 */ 4522 if (tvp != NULL) { 4523 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4524 error = ENOTDIR; 4525 goto abort3; 4526 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4527 error = EISDIR; 4528 goto abort3; 4529 } 4530 } 4531 4532 /* 4533 * Acknowledge some random screw case, among the dozens that 4534 * might arise. 4535 */ 4536 if (fvp == tdvp) { 4537 error = EINVAL; 4538 goto abort3; 4539 } 4540 4541 /* 4542 * Acknowledge that POSIX has a wacky screw case. 4543 * 4544 * XXX Eventually the retain flag needs to be passed on to 4545 * VOP_RENAME. 4546 */ 4547 if (fvp == tvp) { 4548 if (retain) { 4549 error = 0; 4550 goto abort3; 4551 } else if ((fdvp == tdvp) && 4552 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4553 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4554 fnd.ni_cnd.cn_namelen))) { 4555 error = 0; 4556 goto abort3; 4557 } 4558 } 4559 4560 /* 4561 * Make sure veriexec can screw us up. (But a race can screw 4562 * up veriexec, of course -- remember, fvp and (soon) tvp are 4563 * bogus.) 4564 */ 4565 #if NVERIEXEC > 0 4566 { 4567 char *f1, *f2; 4568 size_t f1_len; 4569 size_t f2_len; 4570 4571 f1_len = fnd.ni_cnd.cn_namelen + 1; 4572 f1 = kmem_alloc(f1_len, KM_SLEEP); 4573 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4574 4575 f2_len = tnd.ni_cnd.cn_namelen + 1; 4576 f2 = kmem_alloc(f2_len, KM_SLEEP); 4577 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4578 4579 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4580 4581 kmem_free(f1, f1_len); 4582 kmem_free(f2, f2_len); 4583 4584 if (error) 4585 goto abort3; 4586 } 4587 #endif /* NVERIEXEC > 0 */ 4588 4589 /* 4590 * All ready. Incant the rename vop. 4591 */ 4592 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4593 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4594 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4595 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4596 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4597 4598 /* 4599 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4600 * tdvp and tvp. But we can't assert any of that. 4601 */ 4602 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4603 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4604 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4605 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4606 4607 /* 4608 * So all we have left to do is to drop the rename lock and 4609 * destroy the pathbufs. 4610 */ 4611 VFS_RENAMELOCK_EXIT(mp); 4612 fstrans_done(mp); 4613 goto out2; 4614 4615 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4616 VOP_UNLOCK(tvp); 4617 abort2: VOP_UNLOCK(tdvp); 4618 VFS_RENAMELOCK_EXIT(mp); 4619 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4620 vrele(tdvp); 4621 if (tvp != NULL) 4622 vrele(tvp); 4623 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4624 vrele(fdvp); 4625 vrele(fvp); 4626 fstrans_done(mp); 4627 out2: pathbuf_destroy(tpb); 4628 out1: pathbuf_destroy(fpb); 4629 out0: return error; 4630 } 4631 4632 /* 4633 * Make a directory file. 4634 */ 4635 /* ARGSUSED */ 4636 int 4637 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4638 { 4639 /* { 4640 syscallarg(const char *) path; 4641 syscallarg(int) mode; 4642 } */ 4643 4644 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4645 SCARG(uap, mode), UIO_USERSPACE); 4646 } 4647 4648 int 4649 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4650 register_t *retval) 4651 { 4652 /* { 4653 syscallarg(int) fd; 4654 syscallarg(const char *) path; 4655 syscallarg(int) mode; 4656 } */ 4657 4658 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4659 SCARG(uap, mode), UIO_USERSPACE); 4660 } 4661 4662 4663 int 4664 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4665 { 4666 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4667 } 4668 4669 static int 4670 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4671 enum uio_seg seg) 4672 { 4673 struct proc *p = curlwp->l_proc; 4674 struct vnode *vp; 4675 struct vattr vattr; 4676 int error; 4677 struct pathbuf *pb; 4678 struct nameidata nd; 4679 4680 KASSERT(l != NULL || fdat == AT_FDCWD); 4681 4682 /* XXX bollocks, should pass in a pathbuf */ 4683 error = pathbuf_maybe_copyin(path, seg, &pb); 4684 if (error) { 4685 return error; 4686 } 4687 4688 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4689 4690 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4691 pathbuf_destroy(pb); 4692 return (error); 4693 } 4694 vp = nd.ni_vp; 4695 if (vp != NULL) { 4696 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4697 if (nd.ni_dvp == vp) 4698 vrele(nd.ni_dvp); 4699 else 4700 vput(nd.ni_dvp); 4701 vrele(vp); 4702 pathbuf_destroy(pb); 4703 return (EEXIST); 4704 } 4705 vattr_null(&vattr); 4706 vattr.va_type = VDIR; 4707 /* We will read cwdi->cwdi_cmask unlocked. */ 4708 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4709 nd.ni_cnd.cn_flags |= WILLBEDIR; 4710 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4711 if (!error) 4712 vrele(nd.ni_vp); 4713 vput(nd.ni_dvp); 4714 pathbuf_destroy(pb); 4715 return (error); 4716 } 4717 4718 /* 4719 * Remove a directory file. 4720 */ 4721 /* ARGSUSED */ 4722 int 4723 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4724 { 4725 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4726 AT_REMOVEDIR, UIO_USERSPACE); 4727 } 4728 4729 /* 4730 * Read a block of directory entries in a file system independent format. 4731 */ 4732 int 4733 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4734 { 4735 /* { 4736 syscallarg(int) fd; 4737 syscallarg(char *) buf; 4738 syscallarg(size_t) count; 4739 } */ 4740 file_t *fp; 4741 int error, done; 4742 4743 /* fd_getvnode() will use the descriptor for us */ 4744 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4745 return (error); 4746 if ((fp->f_flag & FREAD) == 0) { 4747 error = EBADF; 4748 goto out; 4749 } 4750 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4751 SCARG(uap, count), &done, l, 0, 0); 4752 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4753 *retval = done; 4754 out: 4755 fd_putfile(SCARG(uap, fd)); 4756 return (error); 4757 } 4758 4759 /* 4760 * Set the mode mask for creation of filesystem nodes. 4761 */ 4762 int 4763 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4764 { 4765 /* { 4766 syscallarg(mode_t) newmask; 4767 } */ 4768 4769 /* 4770 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4771 * serialization with those reads is required. It's important to 4772 * return a coherent answer for the caller of umask() though, and 4773 * the atomic operation accomplishes that. 4774 */ 4775 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4776 SCARG(uap, newmask) & ALLPERMS); 4777 4778 return (0); 4779 } 4780 4781 int 4782 dorevoke(struct vnode *vp, kauth_cred_t cred) 4783 { 4784 struct vattr vattr; 4785 int error, fs_decision; 4786 4787 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4788 error = VOP_GETATTR(vp, &vattr, cred); 4789 VOP_UNLOCK(vp); 4790 if (error != 0) 4791 return error; 4792 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4793 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4794 fs_decision); 4795 if (!error) 4796 VOP_REVOKE(vp, REVOKEALL); 4797 return (error); 4798 } 4799 4800 /* 4801 * Void all references to file by ripping underlying filesystem 4802 * away from vnode. 4803 */ 4804 /* ARGSUSED */ 4805 int 4806 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4807 { 4808 /* { 4809 syscallarg(const char *) path; 4810 } */ 4811 struct vnode *vp; 4812 int error; 4813 4814 error = namei_simple_user(SCARG(uap, path), 4815 NSM_FOLLOW_TRYEMULROOT, &vp); 4816 if (error != 0) 4817 return (error); 4818 error = dorevoke(vp, l->l_cred); 4819 vrele(vp); 4820 return (error); 4821 } 4822 4823 /* 4824 * Allocate backing store for a file, filling a hole without having to 4825 * explicitly write anything out. 4826 */ 4827 /* ARGSUSED */ 4828 int 4829 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4830 register_t *retval) 4831 { 4832 /* { 4833 syscallarg(int) fd; 4834 syscallarg(off_t) pos; 4835 syscallarg(off_t) len; 4836 } */ 4837 int fd; 4838 off_t pos, len; 4839 struct file *fp; 4840 struct vnode *vp; 4841 int error; 4842 4843 fd = SCARG(uap, fd); 4844 pos = SCARG(uap, pos); 4845 len = SCARG(uap, len); 4846 4847 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4848 *retval = EINVAL; 4849 return 0; 4850 } 4851 4852 error = fd_getvnode(fd, &fp); 4853 if (error) { 4854 *retval = error; 4855 return 0; 4856 } 4857 if ((fp->f_flag & FWRITE) == 0) { 4858 error = EBADF; 4859 goto fail; 4860 } 4861 vp = fp->f_vnode; 4862 4863 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4864 if (vp->v_type == VDIR) { 4865 error = EISDIR; 4866 } else { 4867 error = VOP_FALLOCATE(vp, pos, len); 4868 } 4869 VOP_UNLOCK(vp); 4870 4871 fail: 4872 fd_putfile(fd); 4873 *retval = error; 4874 return 0; 4875 } 4876 4877 /* 4878 * Deallocate backing store for a file, creating a hole. Also used for 4879 * invoking TRIM on disks. 4880 */ 4881 /* ARGSUSED */ 4882 int 4883 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4884 register_t *retval) 4885 { 4886 /* { 4887 syscallarg(int) fd; 4888 syscallarg(off_t) pos; 4889 syscallarg(off_t) len; 4890 } */ 4891 int fd; 4892 off_t pos, len; 4893 struct file *fp; 4894 struct vnode *vp; 4895 int error; 4896 4897 fd = SCARG(uap, fd); 4898 pos = SCARG(uap, pos); 4899 len = SCARG(uap, len); 4900 4901 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4902 return EINVAL; 4903 } 4904 4905 error = fd_getvnode(fd, &fp); 4906 if (error) { 4907 return error; 4908 } 4909 if ((fp->f_flag & FWRITE) == 0) { 4910 error = EBADF; 4911 goto fail; 4912 } 4913 vp = fp->f_vnode; 4914 4915 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4916 if (vp->v_type == VDIR) { 4917 error = EISDIR; 4918 } else { 4919 error = VOP_FDISCARD(vp, pos, len); 4920 } 4921 VOP_UNLOCK(vp); 4922 4923 fail: 4924 fd_putfile(fd); 4925 return error; 4926 } 4927