1 /* $NetBSD: vfs_syscalls.c,v 1.562 2024/06/29 13:31:07 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020, 2023 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.562 2024/06/29 13:31:07 christos Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const u_int nmountcompatnames = __arraycount(mountcompatnames); 167 168 /* 169 * Filter event method for EVFILT_FS. 170 */ 171 static struct klist fs_klist; 172 static kmutex_t fs_klist_lock; 173 174 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0); 175 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0); 176 177 void 178 vfs_evfilt_fs_init(void) 179 { 180 klist_init(&fs_klist); 181 mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE); 182 } 183 184 static int 185 filt_fsattach(struct knote *kn) 186 { 187 mutex_enter(&fs_klist_lock); 188 kn->kn_flags |= EV_CLEAR; 189 klist_insert(&fs_klist, kn); 190 mutex_exit(&fs_klist_lock); 191 192 return 0; 193 } 194 195 static void 196 filt_fsdetach(struct knote *kn) 197 { 198 mutex_enter(&fs_klist_lock); 199 klist_remove(&fs_klist, kn); 200 mutex_exit(&fs_klist_lock); 201 } 202 203 static int 204 filt_fs(struct knote *kn, long hint) 205 { 206 int rv; 207 208 if (hint & NOTE_SUBMIT) { 209 KASSERT(mutex_owned(&fs_klist_lock)); 210 kn->kn_fflags |= hint & ~NOTE_SUBMIT; 211 } else { 212 mutex_enter(&fs_klist_lock); 213 } 214 215 rv = (kn->kn_fflags != 0); 216 217 if ((hint & NOTE_SUBMIT) == 0) { 218 mutex_exit(&fs_klist_lock); 219 } 220 221 return rv; 222 } 223 224 /* referenced in kern_event.c */ 225 const struct filterops fs_filtops = { 226 .f_flags = FILTEROP_MPSAFE, 227 .f_attach = filt_fsattach, 228 .f_detach = filt_fsdetach, 229 .f_event = filt_fs, 230 }; 231 232 static int 233 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 234 { 235 file_t *dfp; 236 int error; 237 const char *path = pathbuf_stringcopy_get(ndp->ni_pathbuf); 238 239 if (fdat != AT_FDCWD && path[0] != '/') { 240 if ((error = fd_getvnode(fdat, &dfp)) != 0) 241 goto out; 242 243 NDAT(ndp, dfp->f_vnode); 244 } 245 246 error = namei(ndp); 247 248 if (fdat != AT_FDCWD) 249 fd_putfile(fdat); 250 out: 251 pathbuf_stringcopy_put(ndp->ni_pathbuf, path); 252 return error; 253 } 254 255 static int 256 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 257 namei_simple_flags_t sflags, struct vnode **vp_ret) 258 { 259 file_t *dfp; 260 struct vnode *dvp; 261 int error; 262 263 if (fdat != AT_FDCWD && path[0] != '/') { 264 if ((error = fd_getvnode(fdat, &dfp)) != 0) 265 goto out; 266 267 dvp = dfp->f_vnode; 268 } else { 269 dvp = NULL; 270 } 271 272 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 273 274 if (fdat != AT_FDCWD) 275 fd_putfile(fdat); 276 out: 277 return error; 278 } 279 280 static int 281 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 282 { 283 int error; 284 285 fp->f_flag = flags & FMASK; 286 fp->f_type = DTYPE_VNODE; 287 fp->f_ops = &vnops; 288 fp->f_vnode = vp; 289 290 if (flags & (O_EXLOCK | O_SHLOCK)) { 291 struct flock lf; 292 int type; 293 294 lf.l_whence = SEEK_SET; 295 lf.l_start = 0; 296 lf.l_len = 0; 297 if (flags & O_EXLOCK) 298 lf.l_type = F_WRLCK; 299 else 300 lf.l_type = F_RDLCK; 301 type = F_FLOCK; 302 if ((flags & FNONBLOCK) == 0) 303 type |= F_WAIT; 304 VOP_UNLOCK(vp); 305 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 306 if (error) { 307 (void) vn_close(vp, fp->f_flag, fp->f_cred); 308 fd_abort(l->l_proc, fp, indx); 309 return error; 310 } 311 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 312 atomic_or_uint(&fp->f_flag, FHASLOCK); 313 } 314 if (flags & O_CLOEXEC) 315 fd_set_exclose(l, indx, true); 316 return 0; 317 } 318 319 static int 320 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 321 void *data, size_t *data_len) 322 { 323 struct mount *mp; 324 int error = 0, saved_flags; 325 326 mp = vp->v_mount; 327 saved_flags = mp->mnt_flag; 328 329 /* We can operate only on VV_ROOT nodes. */ 330 if ((vp->v_vflag & VV_ROOT) == 0) { 331 error = EINVAL; 332 goto out; 333 } 334 335 /* 336 * We only allow the filesystem to be reloaded if it 337 * is currently mounted read-only. Additionally, we 338 * prevent read-write to read-only downgrades. 339 */ 340 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 341 (mp->mnt_flag & MNT_RDONLY) == 0 && 342 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 343 error = EOPNOTSUPP; /* Needs translation */ 344 goto out; 345 } 346 347 /* 348 * Enabling MNT_UNION requires a covered mountpoint and 349 * must not happen on the root mount. 350 */ 351 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 352 error = EOPNOTSUPP; 353 goto out; 354 } 355 356 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 357 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 358 if (error) 359 goto out; 360 361 error = vfs_suspend(mp, 0); 362 if (error) 363 goto out; 364 365 mutex_enter(mp->mnt_updating); 366 367 mp->mnt_flag &= ~MNT_OP_FLAGS; 368 mp->mnt_flag |= flags & MNT_OP_FLAGS; 369 370 /* 371 * Set the mount level flags. 372 */ 373 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 374 if ((flags & MNT_RDONLY)) 375 mp->mnt_iflag |= IMNT_WANTRDONLY; 376 else 377 mp->mnt_iflag |= IMNT_WANTRDWR; 378 } 379 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 380 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 381 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 382 mp->mnt_flag &= ~MNT_RDONLY; 383 384 error = VFS_MOUNT(mp, path, data, data_len); 385 386 if (error && data != NULL) { 387 int error2; 388 389 /* 390 * Update failed; let's try and see if it was an 391 * export request. For compat with 3.0 and earlier. 392 */ 393 error2 = vfs_hooks_reexport(mp, path, data); 394 395 /* 396 * Only update error code if the export request was 397 * understood but some problem occurred while 398 * processing it. 399 */ 400 if (error2 != EJUSTRETURN) 401 error = error2; 402 } 403 404 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 405 mp->mnt_flag |= MNT_RDONLY; 406 if (error) 407 mp->mnt_flag = saved_flags; 408 mp->mnt_flag &= ~MNT_OP_FLAGS; 409 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 410 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 411 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 412 vfs_syncer_add_to_worklist(mp); 413 } else { 414 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 415 vfs_syncer_remove_from_worklist(mp); 416 } 417 mutex_exit(mp->mnt_updating); 418 vfs_resume(mp); 419 420 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 421 (flags & MNT_EXTATTR)) { 422 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 423 NULL, 0, NULL) != 0) { 424 printf("%s: failed to start extattr, error = %d", 425 mp->mnt_stat.f_mntonname, error); 426 mp->mnt_flag &= ~MNT_EXTATTR; 427 } 428 } 429 430 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 431 !(flags & MNT_EXTATTR)) { 432 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 433 NULL, 0, NULL) != 0) { 434 printf("%s: failed to stop extattr, error = %d", 435 mp->mnt_stat.f_mntonname, error); 436 mp->mnt_flag |= MNT_RDONLY; 437 } 438 } 439 out: 440 return (error); 441 } 442 443 static int 444 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 445 struct vfsops **vfsops) 446 { 447 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 448 int error; 449 450 if (type_seg == UIO_USERSPACE) { 451 /* Copy file-system type from userspace. */ 452 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 453 } else { 454 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 455 KASSERT(error == 0); 456 } 457 458 if (error) { 459 /* 460 * Historically, filesystem types were identified by numbers. 461 * If we get an integer for the filesystem type instead of a 462 * string, we check to see if it matches one of the historic 463 * filesystem types. 464 */ 465 u_long fsindex = (u_long)fstype; 466 if (fsindex >= nmountcompatnames || 467 mountcompatnames[fsindex] == NULL) 468 return ENODEV; 469 strlcpy(fstypename, mountcompatnames[fsindex], 470 sizeof(fstypename)); 471 } 472 473 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 474 if (strcmp(fstypename, "ufs") == 0) 475 fstypename[0] = 'f'; 476 477 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 478 return 0; 479 480 /* If we can autoload a vfs module, try again */ 481 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 482 483 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 484 return 0; 485 486 return ENODEV; 487 } 488 489 static int 490 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 491 void *data, size_t *data_len) 492 { 493 struct mount *mp; 494 int error; 495 496 /* If MNT_GETARGS is specified, it should be the only flag. */ 497 if (flags & ~MNT_GETARGS) 498 return EINVAL; 499 500 mp = vp->v_mount; 501 502 /* XXX: probably some notion of "can see" here if we want isolation. */ 503 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 504 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 505 if (error) 506 return error; 507 508 if ((vp->v_vflag & VV_ROOT) == 0) 509 return EINVAL; 510 511 if (vfs_busy(mp)) 512 return EPERM; 513 514 mutex_enter(mp->mnt_updating); 515 mp->mnt_flag &= ~MNT_OP_FLAGS; 516 mp->mnt_flag |= MNT_GETARGS; 517 error = VFS_MOUNT(mp, path, data, data_len); 518 mp->mnt_flag &= ~MNT_OP_FLAGS; 519 mutex_exit(mp->mnt_updating); 520 521 vfs_unbusy(mp); 522 return (error); 523 } 524 525 int 526 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 527 { 528 /* { 529 syscallarg(const char *) type; 530 syscallarg(const char *) path; 531 syscallarg(int) flags; 532 syscallarg(void *) data; 533 syscallarg(size_t) data_len; 534 } */ 535 536 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 537 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 538 SCARG(uap, data_len), retval); 539 } 540 541 int 542 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 543 const char *path, int flags, void *data, enum uio_seg data_seg, 544 size_t data_len, register_t *retval) 545 { 546 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 547 struct vnode *vp; 548 void *data_buf = data; 549 bool vfsopsrele = false; 550 size_t alloc_sz = 0; 551 int error; 552 553 /* 554 * Get vnode to be covered 555 */ 556 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 557 if (error != 0) { 558 vp = NULL; 559 goto done; 560 } 561 562 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 563 vfsops = vp->v_mount->mnt_op; 564 } else { 565 /* 'type' is userspace */ 566 error = mount_get_vfsops(type, type_seg, &vfsops); 567 if (error != 0) 568 goto done; 569 vfsopsrele = true; 570 } 571 572 /* 573 * We allow data to be NULL, even for userspace. Some fs's don't need 574 * it. The others will handle NULL. 575 */ 576 if (data != NULL && data_seg == UIO_USERSPACE) { 577 if (data_len == 0) { 578 /* No length supplied, use default for filesystem */ 579 data_len = vfsops->vfs_min_mount_data; 580 581 /* 582 * Hopefully a longer buffer won't make copyin() fail. 583 * For compatibility with 3.0 and earlier. 584 */ 585 if (flags & MNT_UPDATE 586 && data_len < sizeof (struct mnt_export_args30)) 587 data_len = sizeof (struct mnt_export_args30); 588 } 589 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 590 error = EINVAL; 591 goto done; 592 } 593 alloc_sz = data_len; 594 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 595 596 /* NFS needs the buffer even for mnt_getargs .... */ 597 error = copyin(data, data_buf, data_len); 598 if (error != 0) 599 goto done; 600 } 601 602 if (flags & MNT_GETARGS) { 603 if (data_len == 0) { 604 error = EINVAL; 605 goto done; 606 } 607 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 608 if (error != 0) 609 goto done; 610 if (data_seg == UIO_USERSPACE) 611 error = copyout(data_buf, data, data_len); 612 *retval = data_len; 613 } else if (flags & MNT_UPDATE) { 614 error = mount_update(l, vp, path, flags, data_buf, &data_len); 615 } else { 616 /* Locking is handled internally in mount_domount(). */ 617 KASSERT(vfsopsrele == true); 618 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 619 &data_len); 620 vfsopsrele = false; 621 } 622 if (!error) { 623 mutex_enter(&fs_klist_lock); 624 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT); 625 mutex_exit(&fs_klist_lock); 626 } 627 628 done: 629 if (vfsopsrele) 630 vfs_delref(vfsops); 631 if (vp != NULL) { 632 vrele(vp); 633 } 634 if (data_buf != data) 635 kmem_free(data_buf, alloc_sz); 636 return (error); 637 } 638 639 /* 640 * Unmount a file system. 641 * 642 * Note: unmount takes a path to the vnode mounted on as argument, 643 * not special file (as before). 644 */ 645 /* ARGSUSED */ 646 int 647 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 648 { 649 /* { 650 syscallarg(const char *) path; 651 syscallarg(int) flags; 652 } */ 653 struct vnode *vp; 654 struct mount *mp; 655 int error; 656 struct pathbuf *pb; 657 struct nameidata nd; 658 659 error = pathbuf_copyin(SCARG(uap, path), &pb); 660 if (error) { 661 return error; 662 } 663 664 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 665 if ((error = namei(&nd)) != 0) { 666 pathbuf_destroy(pb); 667 return error; 668 } 669 vp = nd.ni_vp; 670 pathbuf_destroy(pb); 671 672 mp = vp->v_mount; 673 vfs_ref(mp); 674 VOP_UNLOCK(vp); 675 676 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 677 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 678 if (error) { 679 vrele(vp); 680 vfs_rele(mp); 681 return (error); 682 } 683 684 /* 685 * Don't allow unmounting the root file system. 686 */ 687 if (mp->mnt_flag & MNT_ROOTFS) { 688 vrele(vp); 689 vfs_rele(mp); 690 return (EINVAL); 691 } 692 693 /* 694 * Must be the root of the filesystem 695 */ 696 if ((vp->v_vflag & VV_ROOT) == 0) { 697 vrele(vp); 698 vfs_rele(mp); 699 return (EINVAL); 700 } 701 702 vrele(vp); 703 error = dounmount(mp, SCARG(uap, flags), l); 704 vfs_rele(mp); 705 if (!error) { 706 mutex_enter(&fs_klist_lock); 707 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT); 708 mutex_exit(&fs_klist_lock); 709 } 710 return error; 711 } 712 713 /* 714 * Sync each mounted filesystem. 715 */ 716 #ifdef DEBUG 717 int syncprt = 0; 718 struct ctldebug debug0 = { "syncprt", &syncprt }; 719 #endif 720 721 void 722 do_sys_sync(struct lwp *l) 723 { 724 mount_iterator_t *iter; 725 struct mount *mp; 726 int asyncflag; 727 728 mountlist_iterator_init(&iter); 729 while ((mp = mountlist_iterator_next(iter)) != NULL) { 730 mutex_enter(mp->mnt_updating); 731 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 732 asyncflag = mp->mnt_flag & MNT_ASYNC; 733 mp->mnt_flag &= ~MNT_ASYNC; 734 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 735 if (asyncflag) 736 mp->mnt_flag |= MNT_ASYNC; 737 } 738 mutex_exit(mp->mnt_updating); 739 } 740 mountlist_iterator_destroy(iter); 741 #ifdef DEBUG 742 if (syncprt) 743 vfs_bufstats(); 744 #endif /* DEBUG */ 745 } 746 747 static bool 748 sync_vnode_filter(void *cookie, vnode_t *vp) 749 { 750 751 if (vp->v_numoutput > 0) { 752 ++*(int *)cookie; 753 } 754 return false; 755 } 756 757 int 758 vfs_syncwait(void) 759 { 760 int nbusy, nbusy_prev, iter; 761 struct vnode_iterator *vniter; 762 mount_iterator_t *mpiter; 763 struct mount *mp; 764 765 for (nbusy_prev = 0, iter = 0; iter < 20;) { 766 nbusy = 0; 767 mountlist_iterator_init(&mpiter); 768 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 769 vnode_t *vp __diagused; 770 vfs_vnode_iterator_init(mp, &vniter); 771 vp = vfs_vnode_iterator_next(vniter, 772 sync_vnode_filter, &nbusy); 773 KASSERT(vp == NULL); 774 vfs_vnode_iterator_destroy(vniter); 775 } 776 mountlist_iterator_destroy(mpiter); 777 778 if (nbusy == 0) 779 break; 780 if (nbusy_prev == 0) 781 nbusy_prev = nbusy; 782 printf("%d ", nbusy); 783 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 784 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 785 iter++; 786 else 787 nbusy_prev = nbusy; 788 } 789 790 if (nbusy) { 791 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 792 printf("giving up\nPrinting vnodes for busy buffers\n"); 793 mountlist_iterator_init(&mpiter); 794 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 795 vnode_t *vp; 796 vfs_vnode_iterator_init(mp, &vniter); 797 vp = vfs_vnode_iterator_next(vniter, 798 NULL, NULL); 799 mutex_enter(vp->v_interlock); 800 if (vp->v_numoutput > 0) 801 vprint(NULL, vp); 802 mutex_exit(vp->v_interlock); 803 vrele(vp); 804 vfs_vnode_iterator_destroy(vniter); 805 } 806 mountlist_iterator_destroy(mpiter); 807 #endif 808 } 809 810 return nbusy; 811 } 812 813 /* ARGSUSED */ 814 int 815 sys_sync(struct lwp *l, const void *v, register_t *retval) 816 { 817 do_sys_sync(l); 818 return (0); 819 } 820 821 822 /* 823 * Access or change filesystem quotas. 824 * 825 * (this is really 14 different calls bundled into one) 826 */ 827 828 static int 829 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 830 { 831 struct quotastat info_k; 832 int error; 833 834 /* ensure any padding bytes are cleared */ 835 memset(&info_k, 0, sizeof(info_k)); 836 837 error = vfs_quotactl_stat(mp, &info_k); 838 if (error) { 839 return error; 840 } 841 842 return copyout(&info_k, info_u, sizeof(info_k)); 843 } 844 845 static int 846 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 847 struct quotaidtypestat *info_u) 848 { 849 struct quotaidtypestat info_k; 850 int error; 851 852 /* ensure any padding bytes are cleared */ 853 memset(&info_k, 0, sizeof(info_k)); 854 855 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 856 if (error) { 857 return error; 858 } 859 860 return copyout(&info_k, info_u, sizeof(info_k)); 861 } 862 863 static int 864 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 865 struct quotaobjtypestat *info_u) 866 { 867 struct quotaobjtypestat info_k; 868 int error; 869 870 /* ensure any padding bytes are cleared */ 871 memset(&info_k, 0, sizeof(info_k)); 872 873 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 874 if (error) { 875 return error; 876 } 877 878 return copyout(&info_k, info_u, sizeof(info_k)); 879 } 880 881 static int 882 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 883 struct quotaval *val_u) 884 { 885 struct quotakey key_k; 886 struct quotaval val_k; 887 int error; 888 889 /* ensure any padding bytes are cleared */ 890 memset(&val_k, 0, sizeof(val_k)); 891 892 error = copyin(key_u, &key_k, sizeof(key_k)); 893 if (error) { 894 return error; 895 } 896 897 error = vfs_quotactl_get(mp, &key_k, &val_k); 898 if (error) { 899 return error; 900 } 901 902 return copyout(&val_k, val_u, sizeof(val_k)); 903 } 904 905 static int 906 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 907 const struct quotaval *val_u) 908 { 909 struct quotakey key_k; 910 struct quotaval val_k; 911 int error; 912 913 error = copyin(key_u, &key_k, sizeof(key_k)); 914 if (error) { 915 return error; 916 } 917 918 error = copyin(val_u, &val_k, sizeof(val_k)); 919 if (error) { 920 return error; 921 } 922 923 return vfs_quotactl_put(mp, &key_k, &val_k); 924 } 925 926 static int 927 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 928 { 929 struct quotakey key_k; 930 int error; 931 932 error = copyin(key_u, &key_k, sizeof(key_k)); 933 if (error) { 934 return error; 935 } 936 937 return vfs_quotactl_del(mp, &key_k); 938 } 939 940 static int 941 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 942 { 943 struct quotakcursor cursor_k; 944 int error; 945 946 /* ensure any padding bytes are cleared */ 947 memset(&cursor_k, 0, sizeof(cursor_k)); 948 949 error = vfs_quotactl_cursoropen(mp, &cursor_k); 950 if (error) { 951 return error; 952 } 953 954 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 955 } 956 957 static int 958 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 959 { 960 struct quotakcursor cursor_k; 961 int error; 962 963 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 964 if (error) { 965 return error; 966 } 967 968 return vfs_quotactl_cursorclose(mp, &cursor_k); 969 } 970 971 static int 972 do_sys_quotactl_cursorskipidtype(struct mount *mp, 973 struct quotakcursor *cursor_u, int idtype) 974 { 975 struct quotakcursor cursor_k; 976 int error; 977 978 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 979 if (error) { 980 return error; 981 } 982 983 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 984 if (error) { 985 return error; 986 } 987 988 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 989 } 990 991 static int 992 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 993 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 994 unsigned *ret_u) 995 { 996 #define CGET_STACK_MAX 8 997 struct quotakcursor cursor_k; 998 struct quotakey stackkeys[CGET_STACK_MAX]; 999 struct quotaval stackvals[CGET_STACK_MAX]; 1000 struct quotakey *keys_k; 1001 struct quotaval *vals_k; 1002 unsigned ret_k; 1003 int error; 1004 1005 if (maxnum > 128) { 1006 maxnum = 128; 1007 } 1008 1009 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1010 if (error) { 1011 return error; 1012 } 1013 1014 if (maxnum <= CGET_STACK_MAX) { 1015 keys_k = stackkeys; 1016 vals_k = stackvals; 1017 /* ensure any padding bytes are cleared */ 1018 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 1019 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 1020 } else { 1021 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 1022 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 1023 } 1024 1025 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 1026 &ret_k); 1027 if (error) { 1028 goto fail; 1029 } 1030 1031 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 1032 if (error) { 1033 goto fail; 1034 } 1035 1036 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 1037 if (error) { 1038 goto fail; 1039 } 1040 1041 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1042 if (error) { 1043 goto fail; 1044 } 1045 1046 /* do last to maximize the chance of being able to recover a failure */ 1047 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1048 1049 fail: 1050 if (keys_k != stackkeys) { 1051 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 1052 } 1053 if (vals_k != stackvals) { 1054 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 1055 } 1056 return error; 1057 } 1058 1059 static int 1060 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 1061 int *ret_u) 1062 { 1063 struct quotakcursor cursor_k; 1064 int ret_k; 1065 int error; 1066 1067 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1068 if (error) { 1069 return error; 1070 } 1071 1072 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1073 if (error) { 1074 return error; 1075 } 1076 1077 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1078 if (error) { 1079 return error; 1080 } 1081 1082 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1083 } 1084 1085 static int 1086 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1087 { 1088 struct quotakcursor cursor_k; 1089 int error; 1090 1091 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1092 if (error) { 1093 return error; 1094 } 1095 1096 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1097 if (error) { 1098 return error; 1099 } 1100 1101 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1102 } 1103 1104 static int 1105 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1106 { 1107 char *path_k; 1108 int error; 1109 1110 /* XXX this should probably be a struct pathbuf */ 1111 path_k = PNBUF_GET(); 1112 error = copyin(path_u, path_k, PATH_MAX); 1113 if (error) { 1114 PNBUF_PUT(path_k); 1115 return error; 1116 } 1117 1118 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1119 1120 PNBUF_PUT(path_k); 1121 return error; 1122 } 1123 1124 static int 1125 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1126 { 1127 return vfs_quotactl_quotaoff(mp, idtype); 1128 } 1129 1130 int 1131 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1132 { 1133 struct mount *mp; 1134 struct vnode *vp; 1135 int error; 1136 1137 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1138 if (error != 0) 1139 return (error); 1140 mp = vp->v_mount; 1141 1142 switch (args->qc_op) { 1143 case QUOTACTL_STAT: 1144 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1145 break; 1146 case QUOTACTL_IDTYPESTAT: 1147 error = do_sys_quotactl_idtypestat(mp, 1148 args->u.idtypestat.qc_idtype, 1149 args->u.idtypestat.qc_info); 1150 break; 1151 case QUOTACTL_OBJTYPESTAT: 1152 error = do_sys_quotactl_objtypestat(mp, 1153 args->u.objtypestat.qc_objtype, 1154 args->u.objtypestat.qc_info); 1155 break; 1156 case QUOTACTL_GET: 1157 error = do_sys_quotactl_get(mp, 1158 args->u.get.qc_key, 1159 args->u.get.qc_val); 1160 break; 1161 case QUOTACTL_PUT: 1162 error = do_sys_quotactl_put(mp, 1163 args->u.put.qc_key, 1164 args->u.put.qc_val); 1165 break; 1166 case QUOTACTL_DEL: 1167 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1168 break; 1169 case QUOTACTL_CURSOROPEN: 1170 error = do_sys_quotactl_cursoropen(mp, 1171 args->u.cursoropen.qc_cursor); 1172 break; 1173 case QUOTACTL_CURSORCLOSE: 1174 error = do_sys_quotactl_cursorclose(mp, 1175 args->u.cursorclose.qc_cursor); 1176 break; 1177 case QUOTACTL_CURSORSKIPIDTYPE: 1178 error = do_sys_quotactl_cursorskipidtype(mp, 1179 args->u.cursorskipidtype.qc_cursor, 1180 args->u.cursorskipidtype.qc_idtype); 1181 break; 1182 case QUOTACTL_CURSORGET: 1183 error = do_sys_quotactl_cursorget(mp, 1184 args->u.cursorget.qc_cursor, 1185 args->u.cursorget.qc_keys, 1186 args->u.cursorget.qc_vals, 1187 args->u.cursorget.qc_maxnum, 1188 args->u.cursorget.qc_ret); 1189 break; 1190 case QUOTACTL_CURSORATEND: 1191 error = do_sys_quotactl_cursoratend(mp, 1192 args->u.cursoratend.qc_cursor, 1193 args->u.cursoratend.qc_ret); 1194 break; 1195 case QUOTACTL_CURSORREWIND: 1196 error = do_sys_quotactl_cursorrewind(mp, 1197 args->u.cursorrewind.qc_cursor); 1198 break; 1199 case QUOTACTL_QUOTAON: 1200 error = do_sys_quotactl_quotaon(mp, 1201 args->u.quotaon.qc_idtype, 1202 args->u.quotaon.qc_quotafile); 1203 break; 1204 case QUOTACTL_QUOTAOFF: 1205 error = do_sys_quotactl_quotaoff(mp, 1206 args->u.quotaoff.qc_idtype); 1207 break; 1208 default: 1209 error = EINVAL; 1210 break; 1211 } 1212 1213 vrele(vp); 1214 return error; 1215 } 1216 1217 /* ARGSUSED */ 1218 int 1219 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1220 register_t *retval) 1221 { 1222 /* { 1223 syscallarg(const char *) path; 1224 syscallarg(struct quotactl_args *) args; 1225 } */ 1226 struct quotactl_args args; 1227 int error; 1228 1229 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1230 if (error) { 1231 return error; 1232 } 1233 1234 return do_sys_quotactl(SCARG(uap, path), &args); 1235 } 1236 1237 int 1238 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1239 int root) 1240 { 1241 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1242 bool chrooted; 1243 int error = 0; 1244 1245 KASSERT(l == curlwp); 1246 1247 /* 1248 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1249 * since it would imply chroots can be escaped. Just make sure this 1250 * routine is self-consistent. 1251 */ 1252 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1253 1254 /* 1255 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1256 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1257 * overrides MNT_NOWAIT. 1258 */ 1259 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1260 (flags != MNT_WAIT && flags != 0)) { 1261 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1262 } else { 1263 /* Get the filesystem stats now */ 1264 memset(sp, 0, sizeof(*sp)); 1265 if ((error = VFS_STATVFS(mp, sp)) != 0) 1266 return error; 1267 if (!chrooted) 1268 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1269 } 1270 1271 if (chrooted) { 1272 size_t len; 1273 char *bp; 1274 char c; 1275 char *path = PNBUF_GET(); 1276 1277 bp = path + MAXPATHLEN; 1278 *--bp = '\0'; 1279 rw_enter(&cwdi->cwdi_lock, RW_READER); 1280 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1281 MAXPATHLEN / 2, 0, l); 1282 rw_exit(&cwdi->cwdi_lock); 1283 if (error) { 1284 PNBUF_PUT(path); 1285 return error; 1286 } 1287 len = strlen(bp); 1288 if (len != 1) { 1289 /* 1290 * for mount points that are below our root, we can see 1291 * them, so we fix up the pathname and return them. The 1292 * rest we cannot see, so we don't allow viewing the 1293 * data. 1294 */ 1295 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1296 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1297 (void)strlcpy(sp->f_mntonname, 1298 c == '\0' ? "/" : &sp->f_mntonname[len], 1299 sizeof(sp->f_mntonname)); 1300 } else { 1301 if (root) 1302 (void)strlcpy(sp->f_mntonname, "/", 1303 sizeof(sp->f_mntonname)); 1304 else 1305 error = EPERM; 1306 } 1307 } 1308 PNBUF_PUT(path); 1309 } 1310 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1311 return error; 1312 } 1313 1314 /* 1315 * Get filesystem statistics by path. 1316 */ 1317 int 1318 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1319 { 1320 struct mount *mp; 1321 int error; 1322 struct vnode *vp; 1323 1324 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1325 if (error != 0) 1326 return error; 1327 mp = vp->v_mount; 1328 error = dostatvfs(mp, sb, l, flags, 1); 1329 vrele(vp); 1330 return error; 1331 } 1332 1333 /* ARGSUSED */ 1334 int 1335 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval) 1336 { 1337 /* { 1338 syscallarg(const char *) path; 1339 syscallarg(struct statvfs *) buf; 1340 syscallarg(int) flags; 1341 } */ 1342 struct statvfs *sb; 1343 int error; 1344 1345 sb = STATVFSBUF_GET(); 1346 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1347 if (error == 0) 1348 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1349 STATVFSBUF_PUT(sb); 1350 return error; 1351 } 1352 1353 /* 1354 * Get filesystem statistics by fd. 1355 */ 1356 int 1357 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1358 { 1359 file_t *fp; 1360 struct mount *mp; 1361 int error; 1362 1363 /* fd_getvnode() will use the descriptor for us */ 1364 if ((error = fd_getvnode(fd, &fp)) != 0) 1365 return (error); 1366 mp = fp->f_vnode->v_mount; 1367 error = dostatvfs(mp, sb, curlwp, flags, 1); 1368 fd_putfile(fd); 1369 return error; 1370 } 1371 1372 /* ARGSUSED */ 1373 int 1374 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval) 1375 { 1376 /* { 1377 syscallarg(int) fd; 1378 syscallarg(struct statvfs *) buf; 1379 syscallarg(int) flags; 1380 } */ 1381 struct statvfs *sb; 1382 int error; 1383 1384 sb = STATVFSBUF_GET(); 1385 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1386 if (error == 0) 1387 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1388 STATVFSBUF_PUT(sb); 1389 return error; 1390 } 1391 1392 1393 /* 1394 * Get statistics on all filesystems. 1395 */ 1396 int 1397 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1398 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1399 register_t *retval) 1400 { 1401 int root = 0; 1402 mount_iterator_t *iter; 1403 struct proc *p = l->l_proc; 1404 struct mount *mp; 1405 struct statvfs *sb; 1406 size_t count, maxcount; 1407 int error = 0; 1408 1409 sb = STATVFSBUF_GET(); 1410 maxcount = bufsize / entry_sz; 1411 count = 0; 1412 mountlist_iterator_init(&iter); 1413 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1414 if (sfsp && count < maxcount) { 1415 error = dostatvfs(mp, sb, l, flags, 0); 1416 if (error) { 1417 error = 0; 1418 continue; 1419 } 1420 error = copyfn(sb, sfsp, entry_sz); 1421 if (error) 1422 goto out; 1423 sfsp = (char *)sfsp + entry_sz; 1424 root |= strcmp(sb->f_mntonname, "/") == 0; 1425 } 1426 count++; 1427 } 1428 1429 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1430 /* 1431 * fake a root entry 1432 */ 1433 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1434 sb, l, flags, 1); 1435 if (error != 0) 1436 goto out; 1437 if (sfsp) { 1438 error = copyfn(sb, sfsp, entry_sz); 1439 if (error != 0) 1440 goto out; 1441 } 1442 count++; 1443 } 1444 if (sfsp && count > maxcount) 1445 *retval = maxcount; 1446 else 1447 *retval = count; 1448 out: 1449 mountlist_iterator_destroy(iter); 1450 STATVFSBUF_PUT(sb); 1451 return error; 1452 } 1453 1454 int 1455 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1456 register_t *retval) 1457 { 1458 /* { 1459 syscallarg(struct statvfs *) buf; 1460 syscallarg(size_t) bufsize; 1461 syscallarg(int) flags; 1462 } */ 1463 1464 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1465 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1466 } 1467 1468 /* 1469 * Change current working directory to a given file descriptor. 1470 */ 1471 int 1472 do_sys_fchdir(struct lwp *l, int fd, register_t *retval) 1473 { 1474 struct proc *p = l->l_proc; 1475 struct cwdinfo *cwdi; 1476 struct vnode *vp, *tdp; 1477 struct mount *mp; 1478 file_t *fp; 1479 int error; 1480 1481 /* fd_getvnode() will use the descriptor for us */ 1482 if ((error = fd_getvnode(fd, &fp)) != 0) 1483 return error; 1484 vp = fp->f_vnode; 1485 1486 vref(vp); 1487 vn_lock(vp, LK_SHARED | LK_RETRY); 1488 if (vp->v_type != VDIR) 1489 error = ENOTDIR; 1490 else 1491 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1492 if (error) { 1493 vput(vp); 1494 goto out; 1495 } 1496 while ((mp = vp->v_mountedhere) != NULL) { 1497 error = vfs_busy(mp); 1498 vput(vp); 1499 if (error != 0) 1500 goto out; 1501 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1502 vfs_unbusy(mp); 1503 if (error) 1504 goto out; 1505 vp = tdp; 1506 } 1507 VOP_UNLOCK(vp); 1508 1509 /* 1510 * Disallow changing to a directory not under the process's 1511 * current root directory (if there is one). 1512 */ 1513 cwdi = p->p_cwdi; 1514 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1515 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1516 vrele(vp); 1517 error = EPERM; /* operation not permitted */ 1518 } else { 1519 vrele(cwdi->cwdi_cdir); 1520 cwdi->cwdi_cdir = vp; 1521 } 1522 rw_exit(&cwdi->cwdi_lock); 1523 1524 out: 1525 fd_putfile(fd); 1526 return error; 1527 } 1528 1529 /* 1530 * Change current working directory to a given file descriptor. 1531 */ 1532 /* ARGSUSED */ 1533 int 1534 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1535 { 1536 /* { 1537 syscallarg(int) fd; 1538 } */ 1539 return do_sys_fchdir(l, SCARG(uap, fd), retval); 1540 } 1541 1542 /* 1543 * Change this process's notion of the root directory to a given file 1544 * descriptor. 1545 */ 1546 int 1547 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1548 { 1549 struct vnode *vp; 1550 file_t *fp; 1551 int error, fd = SCARG(uap, fd); 1552 1553 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1554 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1555 return error; 1556 /* fd_getvnode() will use the descriptor for us */ 1557 if ((error = fd_getvnode(fd, &fp)) != 0) 1558 return error; 1559 vp = fp->f_vnode; 1560 vn_lock(vp, LK_SHARED | LK_RETRY); 1561 if (vp->v_type != VDIR) 1562 error = ENOTDIR; 1563 else 1564 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1565 VOP_UNLOCK(vp); 1566 if (error) 1567 goto out; 1568 vref(vp); 1569 change_root(vp); 1570 1571 out: 1572 fd_putfile(fd); 1573 return (error); 1574 } 1575 1576 /* 1577 * Change current working directory (``.''). 1578 */ 1579 int 1580 do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg, 1581 register_t *retval) 1582 { 1583 struct proc *p = l->l_proc; 1584 struct cwdinfo * cwdi; 1585 int error; 1586 struct vnode *vp; 1587 1588 if ((error = chdir_lookup(path, seg, &vp, l)) != 0) 1589 return error; 1590 cwdi = p->p_cwdi; 1591 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1592 vrele(cwdi->cwdi_cdir); 1593 cwdi->cwdi_cdir = vp; 1594 rw_exit(&cwdi->cwdi_lock); 1595 return 0; 1596 } 1597 1598 /* 1599 * Change current working directory (``.''). 1600 */ 1601 /* ARGSUSED */ 1602 int 1603 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1604 { 1605 /* { 1606 syscallarg(const char *) path; 1607 } */ 1608 return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval); 1609 } 1610 1611 /* 1612 * Change notion of root (``/'') directory. 1613 */ 1614 /* ARGSUSED */ 1615 int 1616 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1617 { 1618 /* { 1619 syscallarg(const char *) path; 1620 } */ 1621 int error; 1622 struct vnode *vp; 1623 1624 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1625 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1626 return (error); 1627 1628 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1629 if (error == 0) 1630 change_root(vp); 1631 return error; 1632 } 1633 1634 /* 1635 * Common routine for chroot and fchroot. 1636 * NB: callers need to properly authorize the change root operation. 1637 */ 1638 void 1639 change_root(struct vnode *vp) 1640 { 1641 kauth_cred_t ncred; 1642 struct lwp *l = curlwp; 1643 struct proc *p = l->l_proc; 1644 struct cwdinfo *cwdi = p->p_cwdi; 1645 1646 ncred = kauth_cred_alloc(); 1647 1648 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1649 if (cwdi->cwdi_rdir != NULL) 1650 vrele(cwdi->cwdi_rdir); 1651 cwdi->cwdi_rdir = vp; 1652 1653 /* 1654 * Prevent escaping from chroot by putting the root under 1655 * the working directory. Silently chdir to / if we aren't 1656 * already there. 1657 */ 1658 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1659 /* 1660 * XXX would be more failsafe to change directory to a 1661 * deadfs node here instead 1662 */ 1663 vrele(cwdi->cwdi_cdir); 1664 vref(vp); 1665 cwdi->cwdi_cdir = vp; 1666 } 1667 rw_exit(&cwdi->cwdi_lock); 1668 1669 /* Get a write lock on the process credential. */ 1670 proc_crmod_enter(); 1671 1672 kauth_cred_clone(p->p_cred, ncred); 1673 kauth_proc_chroot(ncred, p->p_cwdi); 1674 1675 /* Broadcast our credentials to the process and other LWPs. */ 1676 proc_crmod_leave(ncred, p->p_cred, true); 1677 } 1678 1679 /* 1680 * Common routine for chroot and chdir. 1681 * XXX "where" should be enum uio_seg 1682 */ 1683 int 1684 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1685 { 1686 struct pathbuf *pb; 1687 struct nameidata nd; 1688 int error; 1689 1690 error = pathbuf_maybe_copyin(path, where, &pb); 1691 if (error) { 1692 return error; 1693 } 1694 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1695 if ((error = namei(&nd)) != 0) { 1696 pathbuf_destroy(pb); 1697 return error; 1698 } 1699 *vpp = nd.ni_vp; 1700 pathbuf_destroy(pb); 1701 1702 if ((*vpp)->v_type != VDIR) 1703 error = ENOTDIR; 1704 else 1705 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1706 1707 if (error) 1708 vput(*vpp); 1709 else 1710 VOP_UNLOCK(*vpp); 1711 return (error); 1712 } 1713 1714 /* 1715 * Internals of sys_open - path has already been converted into a pathbuf 1716 * (so we can easily reuse this function from other parts of the kernel, 1717 * like posix_spawn post-processing). 1718 */ 1719 int 1720 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1721 int open_mode, int *fd) 1722 { 1723 struct proc *p = l->l_proc; 1724 struct cwdinfo *cwdi = p->p_cwdi; 1725 file_t *fp; 1726 struct vnode *vp; 1727 int dupfd; 1728 bool dupfd_move; 1729 int flags, cmode; 1730 int indx, error; 1731 1732 if (open_flags & O_SEARCH) { 1733 open_flags &= ~(int)O_SEARCH; 1734 } 1735 1736 /* 1737 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1738 * may be specified. 1739 */ 1740 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1741 return EINVAL; 1742 1743 flags = FFLAGS(open_flags); 1744 if ((flags & (FREAD | FWRITE)) == 0) 1745 return EINVAL; 1746 1747 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1748 return error; 1749 } 1750 1751 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1752 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1753 1754 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode, 1755 &vp, &dupfd_move, &dupfd); 1756 if (error != 0) { 1757 fd_abort(p, fp, indx); 1758 return error; 1759 } 1760 1761 if (vp == NULL) { 1762 fd_abort(p, fp, indx); 1763 error = fd_dupopen(dupfd, dupfd_move, flags, &indx); 1764 if (error) 1765 return error; 1766 *fd = indx; 1767 } else { 1768 error = open_setfp(l, fp, vp, indx, flags); 1769 if (error) 1770 return error; 1771 VOP_UNLOCK(vp); 1772 *fd = indx; 1773 fd_affix(p, fp, indx); 1774 } 1775 1776 return 0; 1777 } 1778 1779 int 1780 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1781 { 1782 struct pathbuf *pb; 1783 int error, oflags; 1784 1785 oflags = FFLAGS(open_flags); 1786 if ((oflags & (FREAD | FWRITE)) == 0) 1787 return EINVAL; 1788 1789 pb = pathbuf_create(path); 1790 if (pb == NULL) 1791 return ENOMEM; 1792 1793 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1794 pathbuf_destroy(pb); 1795 1796 return error; 1797 } 1798 1799 static int 1800 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1801 int mode, int *fd) 1802 { 1803 file_t *dfp = NULL; 1804 struct vnode *dvp = NULL; 1805 struct pathbuf *pb; 1806 const char *pathstring = NULL; 1807 int error; 1808 1809 if (path == NULL) { 1810 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1811 if (error == ENOSYS) 1812 goto no_compat; 1813 if (error) 1814 return error; 1815 } else { 1816 no_compat: 1817 error = pathbuf_copyin(path, &pb); 1818 if (error) 1819 return error; 1820 } 1821 1822 pathstring = pathbuf_stringcopy_get(pb); 1823 1824 /* 1825 * fdat is ignored if: 1826 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1827 * 2) if path is absolute, then fdat is useless. 1828 */ 1829 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1830 /* fd_getvnode() will use the descriptor for us */ 1831 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1832 goto out; 1833 1834 dvp = dfp->f_vnode; 1835 } 1836 1837 error = do_open(l, dvp, pb, flags, mode, fd); 1838 1839 if (dfp != NULL) 1840 fd_putfile(fdat); 1841 out: 1842 pathbuf_stringcopy_put(pb, pathstring); 1843 pathbuf_destroy(pb); 1844 return error; 1845 } 1846 1847 int 1848 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1849 { 1850 /* { 1851 syscallarg(const char *) path; 1852 syscallarg(int) flags; 1853 syscallarg(int) mode; 1854 } */ 1855 int error; 1856 int fd; 1857 1858 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1859 SCARG(uap, flags), SCARG(uap, mode), &fd); 1860 1861 if (error == 0) 1862 *retval = fd; 1863 1864 return error; 1865 } 1866 1867 int 1868 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1869 { 1870 /* { 1871 syscallarg(int) fd; 1872 syscallarg(const char *) path; 1873 syscallarg(int) oflags; 1874 syscallarg(int) mode; 1875 } */ 1876 int error; 1877 int fd; 1878 1879 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1880 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1881 1882 if (error == 0) 1883 *retval = fd; 1884 1885 return error; 1886 } 1887 1888 static void 1889 vfs__fhfree(fhandle_t *fhp) 1890 { 1891 size_t fhsize; 1892 1893 fhsize = FHANDLE_SIZE(fhp); 1894 kmem_free(fhp, fhsize); 1895 } 1896 1897 /* 1898 * vfs_composefh: compose a filehandle. 1899 */ 1900 1901 int 1902 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1903 { 1904 struct mount *mp; 1905 struct fid *fidp; 1906 int error; 1907 size_t needfhsize; 1908 size_t fidsize; 1909 1910 mp = vp->v_mount; 1911 fidp = NULL; 1912 if (*fh_size < FHANDLE_SIZE_MIN) { 1913 fidsize = 0; 1914 } else { 1915 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1916 if (fhp != NULL) { 1917 memset(fhp, 0, *fh_size); 1918 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1919 fidp = &fhp->fh_fid; 1920 } 1921 } 1922 error = VFS_VPTOFH(vp, fidp, &fidsize); 1923 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1924 if (error == 0 && *fh_size < needfhsize) { 1925 error = E2BIG; 1926 } 1927 *fh_size = needfhsize; 1928 return error; 1929 } 1930 1931 int 1932 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1933 { 1934 struct mount *mp; 1935 fhandle_t *fhp; 1936 size_t fhsize; 1937 size_t fidsize; 1938 int error; 1939 1940 mp = vp->v_mount; 1941 fidsize = 0; 1942 error = VFS_VPTOFH(vp, NULL, &fidsize); 1943 KASSERT(error != 0); 1944 if (error != E2BIG) { 1945 goto out; 1946 } 1947 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1948 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1949 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1950 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1951 if (error == 0) { 1952 KASSERT(FHANDLE_SIZE(fhp) == fhsize); 1953 KASSERT(FHANDLE_FILEID(fhp)->fid_len == fidsize); 1954 *fhpp = fhp; 1955 } else { 1956 kmem_free(fhp, fhsize); 1957 } 1958 out: 1959 return error; 1960 } 1961 1962 void 1963 vfs_composefh_free(fhandle_t *fhp) 1964 { 1965 1966 vfs__fhfree(fhp); 1967 } 1968 1969 /* 1970 * vfs_fhtovp: lookup a vnode by a filehandle. 1971 */ 1972 1973 int 1974 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1975 { 1976 struct mount *mp; 1977 int error; 1978 1979 *vpp = NULL; 1980 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1981 if (mp == NULL) { 1982 error = ESTALE; 1983 goto out; 1984 } 1985 if (mp->mnt_op->vfs_fhtovp == NULL) { 1986 error = EOPNOTSUPP; 1987 goto out; 1988 } 1989 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 1990 out: 1991 return error; 1992 } 1993 1994 /* 1995 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1996 * the needed size. 1997 */ 1998 1999 int 2000 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 2001 { 2002 fhandle_t *fhp; 2003 int error; 2004 2005 if (fhsize > FHANDLE_SIZE_MAX) { 2006 return EINVAL; 2007 } 2008 if (fhsize < FHANDLE_SIZE_MIN) { 2009 return EINVAL; 2010 } 2011 again: 2012 fhp = kmem_alloc(fhsize, KM_SLEEP); 2013 error = copyin(ufhp, fhp, fhsize); 2014 if (error == 0) { 2015 /* XXX this check shouldn't be here */ 2016 if (FHANDLE_SIZE(fhp) == fhsize) { 2017 *fhpp = fhp; 2018 return 0; 2019 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 2020 /* 2021 * a kludge for nfsv2 padded handles. 2022 */ 2023 size_t sz; 2024 2025 sz = FHANDLE_SIZE(fhp); 2026 kmem_free(fhp, fhsize); 2027 fhsize = sz; 2028 goto again; 2029 } else { 2030 /* 2031 * userland told us wrong size. 2032 */ 2033 error = EINVAL; 2034 } 2035 } 2036 kmem_free(fhp, fhsize); 2037 return error; 2038 } 2039 2040 void 2041 vfs_copyinfh_free(fhandle_t *fhp) 2042 { 2043 2044 vfs__fhfree(fhp); 2045 } 2046 2047 /* 2048 * Get file handle system call 2049 */ 2050 int 2051 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 2052 { 2053 /* { 2054 syscallarg(char *) fname; 2055 syscallarg(fhandle_t *) fhp; 2056 syscallarg(size_t *) fh_size; 2057 } */ 2058 struct vnode *vp; 2059 fhandle_t *fh; 2060 int error; 2061 struct pathbuf *pb; 2062 struct nameidata nd; 2063 size_t sz; 2064 size_t usz; 2065 2066 /* 2067 * Must be super user 2068 */ 2069 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2070 0, NULL, NULL, NULL); 2071 if (error) 2072 return (error); 2073 2074 error = pathbuf_copyin(SCARG(uap, fname), &pb); 2075 if (error) { 2076 return error; 2077 } 2078 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2079 error = namei(&nd); 2080 if (error) { 2081 pathbuf_destroy(pb); 2082 return error; 2083 } 2084 vp = nd.ni_vp; 2085 pathbuf_destroy(pb); 2086 2087 error = vfs_composefh_alloc(vp, &fh); 2088 vput(vp); 2089 if (error != 0) { 2090 return error; 2091 } 2092 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2093 if (error != 0) { 2094 goto out; 2095 } 2096 sz = FHANDLE_SIZE(fh); 2097 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2098 if (error != 0) { 2099 goto out; 2100 } 2101 if (usz >= sz) { 2102 error = copyout(fh, SCARG(uap, fhp), sz); 2103 } else { 2104 error = E2BIG; 2105 } 2106 out: 2107 vfs_composefh_free(fh); 2108 return (error); 2109 } 2110 2111 /* 2112 * Open a file given a file handle. 2113 * 2114 * Check permissions, allocate an open file structure, 2115 * and call the device open routine if any. 2116 */ 2117 2118 int 2119 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2120 register_t *retval) 2121 { 2122 file_t *fp; 2123 struct vnode *vp = NULL; 2124 kauth_cred_t cred = l->l_cred; 2125 file_t *nfp; 2126 int indx, error; 2127 struct vattr va; 2128 fhandle_t *fh; 2129 int flags; 2130 proc_t *p; 2131 2132 p = curproc; 2133 2134 /* 2135 * Must be super user 2136 */ 2137 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2138 0, NULL, NULL, NULL))) 2139 return (error); 2140 2141 if (oflags & O_SEARCH) { 2142 oflags &= ~(int)O_SEARCH; 2143 } 2144 2145 flags = FFLAGS(oflags); 2146 if ((flags & (FREAD | FWRITE)) == 0) 2147 return (EINVAL); 2148 if ((flags & O_CREAT)) 2149 return (EINVAL); 2150 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2151 return (error); 2152 fp = nfp; 2153 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2154 if (error != 0) { 2155 goto bad; 2156 } 2157 error = vfs_fhtovp(fh, &vp); 2158 vfs_copyinfh_free(fh); 2159 if (error != 0) { 2160 goto bad; 2161 } 2162 2163 /* Now do an effective vn_open */ 2164 2165 if (vp->v_type == VSOCK) { 2166 error = EOPNOTSUPP; 2167 goto bad; 2168 } 2169 error = vn_openchk(vp, cred, flags); 2170 if (error != 0) 2171 goto bad; 2172 if (flags & O_TRUNC) { 2173 VOP_UNLOCK(vp); /* XXX */ 2174 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2175 vattr_null(&va); 2176 va.va_size = 0; 2177 error = VOP_SETATTR(vp, &va, cred); 2178 if (error) 2179 goto bad; 2180 } 2181 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2182 goto bad; 2183 if (flags & FWRITE) { 2184 mutex_enter(vp->v_interlock); 2185 vp->v_writecount++; 2186 mutex_exit(vp->v_interlock); 2187 } 2188 2189 /* done with modified vn_open, now finish what sys_open does. */ 2190 if ((error = open_setfp(l, fp, vp, indx, flags))) 2191 return error; 2192 2193 VOP_UNLOCK(vp); 2194 *retval = indx; 2195 fd_affix(p, fp, indx); 2196 return (0); 2197 2198 bad: 2199 fd_abort(p, fp, indx); 2200 if (vp != NULL) 2201 vput(vp); 2202 if (error == EDUPFD || error == EMOVEFD) { 2203 /* XXX should probably close curlwp->l_dupfd */ 2204 error = EOPNOTSUPP; 2205 } 2206 return (error); 2207 } 2208 2209 int 2210 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2211 { 2212 /* { 2213 syscallarg(const void *) fhp; 2214 syscallarg(size_t) fh_size; 2215 syscallarg(int) flags; 2216 } */ 2217 2218 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2219 SCARG(uap, flags), retval); 2220 } 2221 2222 int 2223 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2224 { 2225 int error; 2226 fhandle_t *fh; 2227 struct vnode *vp; 2228 2229 /* 2230 * Must be super user 2231 */ 2232 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2233 0, NULL, NULL, NULL))) 2234 return (error); 2235 2236 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2237 if (error != 0) 2238 return error; 2239 2240 error = vfs_fhtovp(fh, &vp); 2241 vfs_copyinfh_free(fh); 2242 if (error != 0) 2243 return error; 2244 2245 error = vn_stat(vp, sb); 2246 vput(vp); 2247 return error; 2248 } 2249 2250 2251 /* ARGSUSED */ 2252 int 2253 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2254 { 2255 /* { 2256 syscallarg(const void *) fhp; 2257 syscallarg(size_t) fh_size; 2258 syscallarg(struct stat *) sb; 2259 } */ 2260 struct stat sb; 2261 int error; 2262 2263 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2264 if (error) 2265 return error; 2266 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2267 } 2268 2269 int 2270 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2271 int flags) 2272 { 2273 fhandle_t *fh; 2274 struct mount *mp; 2275 struct vnode *vp; 2276 int error; 2277 2278 /* 2279 * Must be super user 2280 */ 2281 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2282 0, NULL, NULL, NULL))) 2283 return error; 2284 2285 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2286 if (error != 0) 2287 return error; 2288 2289 error = vfs_fhtovp(fh, &vp); 2290 vfs_copyinfh_free(fh); 2291 if (error != 0) 2292 return error; 2293 2294 mp = vp->v_mount; 2295 error = dostatvfs(mp, sb, l, flags, 1); 2296 vput(vp); 2297 return error; 2298 } 2299 2300 /* ARGSUSED */ 2301 int 2302 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval) 2303 { 2304 /* { 2305 syscallarg(const void *) fhp; 2306 syscallarg(size_t) fh_size; 2307 syscallarg(struct statvfs *) buf; 2308 syscallarg(int) flags; 2309 } */ 2310 struct statvfs *sb = STATVFSBUF_GET(); 2311 int error; 2312 2313 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2314 SCARG(uap, flags)); 2315 if (error == 0) 2316 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2317 STATVFSBUF_PUT(sb); 2318 return error; 2319 } 2320 2321 int 2322 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2323 dev_t dev) 2324 { 2325 2326 /* 2327 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2328 * in mode and dev=0. 2329 * 2330 * In all the other cases it's implementation defined behavior. 2331 */ 2332 2333 if ((mode & S_IFIFO) && dev == 0) 2334 return do_sys_mkfifoat(l, fdat, pathname, mode); 2335 else 2336 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2337 UIO_USERSPACE); 2338 } 2339 2340 /* 2341 * Create a special file. 2342 */ 2343 /* ARGSUSED */ 2344 int 2345 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2346 register_t *retval) 2347 { 2348 /* { 2349 syscallarg(const char *) path; 2350 syscallarg(mode_t) mode; 2351 syscallarg(dev_t) dev; 2352 } */ 2353 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2354 SCARG(uap, mode), SCARG(uap, dev)); 2355 } 2356 2357 int 2358 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2359 register_t *retval) 2360 { 2361 /* { 2362 syscallarg(int) fd; 2363 syscallarg(const char *) path; 2364 syscallarg(mode_t) mode; 2365 syscallarg(int) pad; 2366 syscallarg(dev_t) dev; 2367 } */ 2368 2369 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2370 SCARG(uap, mode), SCARG(uap, dev)); 2371 } 2372 2373 int 2374 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2375 enum uio_seg seg) 2376 { 2377 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2378 } 2379 2380 int 2381 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2382 dev_t dev, enum uio_seg seg) 2383 { 2384 struct proc *p = l->l_proc; 2385 struct vnode *vp; 2386 struct vattr vattr; 2387 int error, optype; 2388 struct pathbuf *pb; 2389 struct nameidata nd; 2390 const char *pathstring; 2391 2392 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2393 0, NULL, NULL, NULL)) != 0) 2394 return (error); 2395 2396 optype = VOP_MKNOD_DESCOFFSET; 2397 2398 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2399 if (error) { 2400 return error; 2401 } 2402 pathstring = pathbuf_stringcopy_get(pb); 2403 if (pathstring == NULL) { 2404 pathbuf_destroy(pb); 2405 return ENOMEM; 2406 } 2407 2408 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2409 2410 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2411 goto out; 2412 vp = nd.ni_vp; 2413 2414 if (vp != NULL) 2415 error = EEXIST; 2416 else { 2417 vattr_null(&vattr); 2418 /* We will read cwdi->cwdi_cmask unlocked. */ 2419 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2420 vattr.va_rdev = dev; 2421 2422 switch (mode & S_IFMT) { 2423 case S_IFMT: /* used by badsect to flag bad sectors */ 2424 vattr.va_type = VBAD; 2425 break; 2426 case S_IFCHR: 2427 vattr.va_type = VCHR; 2428 break; 2429 case S_IFBLK: 2430 vattr.va_type = VBLK; 2431 break; 2432 case S_IFWHT: 2433 optype = VOP_WHITEOUT_DESCOFFSET; 2434 break; 2435 case S_IFREG: 2436 #if NVERIEXEC > 0 2437 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2438 O_CREAT); 2439 #endif /* NVERIEXEC > 0 */ 2440 vattr.va_type = VREG; 2441 vattr.va_rdev = VNOVAL; 2442 optype = VOP_CREATE_DESCOFFSET; 2443 break; 2444 default: 2445 error = EINVAL; 2446 break; 2447 } 2448 2449 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2450 vattr.va_rdev == VNOVAL) 2451 error = EINVAL; 2452 } 2453 2454 if (!error) { 2455 switch (optype) { 2456 case VOP_WHITEOUT_DESCOFFSET: 2457 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2458 if (error) 2459 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2460 vput(nd.ni_dvp); 2461 break; 2462 2463 case VOP_MKNOD_DESCOFFSET: 2464 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2465 &nd.ni_cnd, &vattr); 2466 if (error == 0) 2467 vrele(nd.ni_vp); 2468 vput(nd.ni_dvp); 2469 break; 2470 2471 case VOP_CREATE_DESCOFFSET: 2472 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2473 &nd.ni_cnd, &vattr); 2474 if (error == 0) 2475 vrele(nd.ni_vp); 2476 vput(nd.ni_dvp); 2477 break; 2478 } 2479 } else { 2480 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2481 if (nd.ni_dvp == vp) 2482 vrele(nd.ni_dvp); 2483 else 2484 vput(nd.ni_dvp); 2485 if (vp) 2486 vrele(vp); 2487 } 2488 out: 2489 pathbuf_stringcopy_put(pb, pathstring); 2490 pathbuf_destroy(pb); 2491 return (error); 2492 } 2493 2494 /* 2495 * Create a named pipe. 2496 */ 2497 /* ARGSUSED */ 2498 int 2499 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2500 { 2501 /* { 2502 syscallarg(const char *) path; 2503 syscallarg(int) mode; 2504 } */ 2505 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2506 } 2507 2508 int 2509 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2510 register_t *retval) 2511 { 2512 /* { 2513 syscallarg(int) fd; 2514 syscallarg(const char *) path; 2515 syscallarg(int) mode; 2516 } */ 2517 2518 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2519 SCARG(uap, mode)); 2520 } 2521 2522 static int 2523 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2524 { 2525 struct proc *p = l->l_proc; 2526 struct vattr vattr; 2527 int error; 2528 struct pathbuf *pb; 2529 struct nameidata nd; 2530 2531 error = pathbuf_copyin(path, &pb); 2532 if (error) { 2533 return error; 2534 } 2535 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2536 2537 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2538 pathbuf_destroy(pb); 2539 return error; 2540 } 2541 if (nd.ni_vp != NULL) { 2542 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2543 if (nd.ni_dvp == nd.ni_vp) 2544 vrele(nd.ni_dvp); 2545 else 2546 vput(nd.ni_dvp); 2547 vrele(nd.ni_vp); 2548 pathbuf_destroy(pb); 2549 return (EEXIST); 2550 } 2551 vattr_null(&vattr); 2552 vattr.va_type = VFIFO; 2553 /* We will read cwdi->cwdi_cmask unlocked. */ 2554 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2555 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2556 if (error == 0) 2557 vrele(nd.ni_vp); 2558 vput(nd.ni_dvp); 2559 pathbuf_destroy(pb); 2560 return (error); 2561 } 2562 2563 /* 2564 * Make a hard file link. 2565 */ 2566 /* ARGSUSED */ 2567 int 2568 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2569 const char *link, int follow, register_t *retval) 2570 { 2571 struct vnode *vp; 2572 struct pathbuf *linkpb; 2573 struct nameidata nd; 2574 namei_simple_flags_t ns_flags; 2575 int error; 2576 2577 if (follow & AT_SYMLINK_FOLLOW) 2578 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2579 else 2580 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2581 2582 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2583 if (error != 0) 2584 return (error); 2585 error = pathbuf_copyin(link, &linkpb); 2586 if (error) { 2587 goto out1; 2588 } 2589 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2590 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2591 goto out2; 2592 if (nd.ni_vp) { 2593 error = EEXIST; 2594 goto abortop; 2595 } 2596 /* Prevent hard links on directories. */ 2597 if (vp->v_type == VDIR) { 2598 error = EPERM; 2599 goto abortop; 2600 } 2601 /* Prevent cross-mount operation. */ 2602 if (nd.ni_dvp->v_mount != vp->v_mount) { 2603 error = EXDEV; 2604 goto abortop; 2605 } 2606 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2607 VOP_UNLOCK(nd.ni_dvp); 2608 vrele(nd.ni_dvp); 2609 out2: 2610 pathbuf_destroy(linkpb); 2611 out1: 2612 vrele(vp); 2613 return (error); 2614 abortop: 2615 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2616 if (nd.ni_dvp == nd.ni_vp) 2617 vrele(nd.ni_dvp); 2618 else 2619 vput(nd.ni_dvp); 2620 if (nd.ni_vp != NULL) 2621 vrele(nd.ni_vp); 2622 goto out2; 2623 } 2624 2625 int 2626 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2627 { 2628 /* { 2629 syscallarg(const char *) path; 2630 syscallarg(const char *) link; 2631 } */ 2632 const char *path = SCARG(uap, path); 2633 const char *link = SCARG(uap, link); 2634 2635 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2636 AT_SYMLINK_FOLLOW, retval); 2637 } 2638 2639 int 2640 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2641 register_t *retval) 2642 { 2643 /* { 2644 syscallarg(int) fd1; 2645 syscallarg(const char *) name1; 2646 syscallarg(int) fd2; 2647 syscallarg(const char *) name2; 2648 syscallarg(int) flags; 2649 } */ 2650 int fd1 = SCARG(uap, fd1); 2651 const char *name1 = SCARG(uap, name1); 2652 int fd2 = SCARG(uap, fd2); 2653 const char *name2 = SCARG(uap, name2); 2654 int follow; 2655 2656 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2657 2658 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2659 } 2660 2661 2662 int 2663 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2664 { 2665 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2666 } 2667 2668 static int 2669 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2670 const char *link, enum uio_seg seg) 2671 { 2672 struct proc *p = curproc; 2673 struct vattr vattr; 2674 char *path; 2675 int error; 2676 size_t len; 2677 struct pathbuf *linkpb; 2678 struct nameidata nd; 2679 2680 KASSERT(l != NULL || fdat == AT_FDCWD); 2681 2682 path = PNBUF_GET(); 2683 if (seg == UIO_USERSPACE) { 2684 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2685 goto out1; 2686 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2687 goto out1; 2688 } else { 2689 len = strlen(patharg) + 1; 2690 KASSERT(len <= MAXPATHLEN); 2691 memcpy(path, patharg, len); 2692 linkpb = pathbuf_create(link); 2693 if (linkpb == NULL) { 2694 error = ENOMEM; 2695 goto out1; 2696 } 2697 } 2698 ktrkuser("symlink-target", path, len - 1); 2699 2700 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2701 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2702 goto out2; 2703 if (nd.ni_vp) { 2704 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2705 if (nd.ni_dvp == nd.ni_vp) 2706 vrele(nd.ni_dvp); 2707 else 2708 vput(nd.ni_dvp); 2709 vrele(nd.ni_vp); 2710 error = EEXIST; 2711 goto out2; 2712 } 2713 vattr_null(&vattr); 2714 vattr.va_type = VLNK; 2715 /* We will read cwdi->cwdi_cmask unlocked. */ 2716 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2717 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2718 if (error == 0) 2719 vrele(nd.ni_vp); 2720 vput(nd.ni_dvp); 2721 out2: 2722 pathbuf_destroy(linkpb); 2723 out1: 2724 PNBUF_PUT(path); 2725 return (error); 2726 } 2727 2728 /* 2729 * Make a symbolic link. 2730 */ 2731 /* ARGSUSED */ 2732 int 2733 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2734 { 2735 /* { 2736 syscallarg(const char *) path; 2737 syscallarg(const char *) link; 2738 } */ 2739 2740 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2741 UIO_USERSPACE); 2742 } 2743 2744 int 2745 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2746 register_t *retval) 2747 { 2748 /* { 2749 syscallarg(const char *) path1; 2750 syscallarg(int) fd; 2751 syscallarg(const char *) path2; 2752 } */ 2753 2754 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2755 SCARG(uap, path2), UIO_USERSPACE); 2756 } 2757 2758 /* 2759 * Delete a whiteout from the filesystem. 2760 */ 2761 /* ARGSUSED */ 2762 int 2763 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2764 { 2765 /* { 2766 syscallarg(const char *) path; 2767 } */ 2768 int error; 2769 struct pathbuf *pb; 2770 struct nameidata nd; 2771 2772 error = pathbuf_copyin(SCARG(uap, path), &pb); 2773 if (error) { 2774 return error; 2775 } 2776 2777 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2778 error = namei(&nd); 2779 if (error) { 2780 pathbuf_destroy(pb); 2781 return (error); 2782 } 2783 2784 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2785 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2786 if (nd.ni_dvp == nd.ni_vp) 2787 vrele(nd.ni_dvp); 2788 else 2789 vput(nd.ni_dvp); 2790 if (nd.ni_vp) 2791 vrele(nd.ni_vp); 2792 pathbuf_destroy(pb); 2793 return (EEXIST); 2794 } 2795 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2796 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2797 vput(nd.ni_dvp); 2798 pathbuf_destroy(pb); 2799 return (error); 2800 } 2801 2802 /* 2803 * Delete a name from the filesystem. 2804 */ 2805 /* ARGSUSED */ 2806 int 2807 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2808 { 2809 /* { 2810 syscallarg(const char *) path; 2811 } */ 2812 2813 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2814 } 2815 2816 int 2817 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2818 register_t *retval) 2819 { 2820 /* { 2821 syscallarg(int) fd; 2822 syscallarg(const char *) path; 2823 syscallarg(int) flag; 2824 } */ 2825 2826 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2827 SCARG(uap, flag), UIO_USERSPACE); 2828 } 2829 2830 int 2831 do_sys_unlink(const char *arg, enum uio_seg seg) 2832 { 2833 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2834 } 2835 2836 static int 2837 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2838 enum uio_seg seg) 2839 { 2840 struct vnode *vp; 2841 int error; 2842 struct pathbuf *pb; 2843 struct nameidata nd; 2844 const char *pathstring; 2845 2846 KASSERT(l != NULL || fdat == AT_FDCWD); 2847 2848 error = pathbuf_maybe_copyin(arg, seg, &pb); 2849 if (error) { 2850 return error; 2851 } 2852 pathstring = pathbuf_stringcopy_get(pb); 2853 if (pathstring == NULL) { 2854 pathbuf_destroy(pb); 2855 return ENOMEM; 2856 } 2857 2858 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2859 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2860 goto out; 2861 vp = nd.ni_vp; 2862 2863 /* 2864 * The root of a mounted filesystem cannot be deleted. 2865 */ 2866 if ((vp->v_vflag & VV_ROOT) != 0) { 2867 error = EBUSY; 2868 goto abort; 2869 } 2870 2871 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2872 error = EBUSY; 2873 goto abort; 2874 } 2875 2876 /* 2877 * No rmdir "." please. 2878 */ 2879 if (nd.ni_dvp == vp) { 2880 error = EINVAL; 2881 goto abort; 2882 } 2883 2884 /* 2885 * AT_REMOVEDIR is required to remove a directory 2886 */ 2887 if (vp->v_type == VDIR) { 2888 if (!(flags & AT_REMOVEDIR)) { 2889 error = EPERM; 2890 goto abort; 2891 } else { 2892 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2893 vput(nd.ni_dvp); 2894 goto out; 2895 } 2896 } 2897 2898 /* 2899 * Starting here we only deal with non directories. 2900 */ 2901 if (flags & AT_REMOVEDIR) { 2902 error = ENOTDIR; 2903 goto abort; 2904 } 2905 2906 #if NVERIEXEC > 0 2907 /* Handle remove requests for veriexec entries. */ 2908 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2909 goto abort; 2910 } 2911 #endif /* NVERIEXEC > 0 */ 2912 2913 #ifdef FILEASSOC 2914 (void)fileassoc_file_delete(vp); 2915 #endif /* FILEASSOC */ 2916 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2917 vput(nd.ni_dvp); 2918 goto out; 2919 2920 abort: 2921 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2922 if (nd.ni_dvp == vp) 2923 vrele(nd.ni_dvp); 2924 else 2925 vput(nd.ni_dvp); 2926 vput(vp); 2927 2928 out: 2929 pathbuf_stringcopy_put(pb, pathstring); 2930 pathbuf_destroy(pb); 2931 return (error); 2932 } 2933 2934 /* 2935 * Reposition read/write file offset. 2936 */ 2937 int 2938 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2939 { 2940 /* { 2941 syscallarg(int) fd; 2942 syscallarg(int) pad; 2943 syscallarg(off_t) offset; 2944 syscallarg(int) whence; 2945 } */ 2946 file_t *fp; 2947 int error, fd; 2948 2949 switch (SCARG(uap, whence)) { 2950 case SEEK_CUR: 2951 case SEEK_END: 2952 case SEEK_SET: 2953 break; 2954 default: 2955 return EINVAL; 2956 } 2957 2958 fd = SCARG(uap, fd); 2959 2960 if ((fp = fd_getfile(fd)) == NULL) 2961 return (EBADF); 2962 2963 if (fp->f_ops->fo_seek == NULL) { 2964 error = ESPIPE; 2965 goto out; 2966 } 2967 2968 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), 2969 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); 2970 out: 2971 fd_putfile(fd); 2972 return (error); 2973 } 2974 2975 /* 2976 * Positional read system call. 2977 */ 2978 int 2979 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2980 { 2981 /* { 2982 syscallarg(int) fd; 2983 syscallarg(void *) buf; 2984 syscallarg(size_t) nbyte; 2985 syscallarg(off_t) offset; 2986 } */ 2987 file_t *fp; 2988 off_t offset; 2989 int error, fd = SCARG(uap, fd); 2990 2991 if ((fp = fd_getfile(fd)) == NULL) 2992 return (EBADF); 2993 2994 if ((fp->f_flag & FREAD) == 0) { 2995 fd_putfile(fd); 2996 return (EBADF); 2997 } 2998 2999 if (fp->f_ops->fo_seek == NULL) { 3000 error = ESPIPE; 3001 goto out; 3002 } 3003 3004 offset = SCARG(uap, offset); 3005 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3006 if (error) 3007 goto out; 3008 3009 /* dofileread() will unuse the descriptor for us */ 3010 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3011 &offset, 0, retval)); 3012 3013 out: 3014 fd_putfile(fd); 3015 return (error); 3016 } 3017 3018 /* 3019 * Positional scatter read system call. 3020 */ 3021 int 3022 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 3023 { 3024 /* { 3025 syscallarg(int) fd; 3026 syscallarg(const struct iovec *) iovp; 3027 syscallarg(int) iovcnt; 3028 syscallarg(off_t) offset; 3029 } */ 3030 off_t offset = SCARG(uap, offset); 3031 3032 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 3033 SCARG(uap, iovcnt), &offset, 0, retval); 3034 } 3035 3036 /* 3037 * Positional write system call. 3038 */ 3039 int 3040 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 3041 { 3042 /* { 3043 syscallarg(int) fd; 3044 syscallarg(const void *) buf; 3045 syscallarg(size_t) nbyte; 3046 syscallarg(off_t) offset; 3047 } */ 3048 file_t *fp; 3049 off_t offset; 3050 int error, fd = SCARG(uap, fd); 3051 3052 if ((fp = fd_getfile(fd)) == NULL) 3053 return (EBADF); 3054 3055 if ((fp->f_flag & FWRITE) == 0) { 3056 fd_putfile(fd); 3057 return (EBADF); 3058 } 3059 3060 if (fp->f_ops->fo_seek == NULL) { 3061 error = ESPIPE; 3062 goto out; 3063 } 3064 3065 offset = SCARG(uap, offset); 3066 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3067 if (error) 3068 goto out; 3069 3070 /* dofilewrite() will unuse the descriptor for us */ 3071 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3072 &offset, 0, retval)); 3073 3074 out: 3075 fd_putfile(fd); 3076 return (error); 3077 } 3078 3079 /* 3080 * Positional gather write system call. 3081 */ 3082 int 3083 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 3084 { 3085 /* { 3086 syscallarg(int) fd; 3087 syscallarg(const struct iovec *) iovp; 3088 syscallarg(int) iovcnt; 3089 syscallarg(off_t) offset; 3090 } */ 3091 off_t offset = SCARG(uap, offset); 3092 3093 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3094 SCARG(uap, iovcnt), &offset, 0, retval); 3095 } 3096 3097 /* 3098 * Check access permissions. 3099 */ 3100 int 3101 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 3102 { 3103 /* { 3104 syscallarg(const char *) path; 3105 syscallarg(int) flags; 3106 } */ 3107 3108 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3109 SCARG(uap, flags), 0); 3110 } 3111 3112 int 3113 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3114 int mode, int flags) 3115 { 3116 kauth_cred_t cred; 3117 struct vnode *vp; 3118 int error, nd_flag, vmode; 3119 struct pathbuf *pb; 3120 struct nameidata nd; 3121 3122 CTASSERT(F_OK == 0); 3123 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3124 /* nonsense mode */ 3125 return EINVAL; 3126 } 3127 3128 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3129 if (flags & AT_SYMLINK_NOFOLLOW) 3130 nd_flag &= ~FOLLOW; 3131 3132 error = pathbuf_copyin(path, &pb); 3133 if (error) 3134 return error; 3135 3136 NDINIT(&nd, LOOKUP, nd_flag, pb); 3137 3138 /* Override default credentials */ 3139 if (!(flags & AT_EACCESS)) { 3140 cred = kauth_cred_dup(l->l_cred); 3141 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3142 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3143 } else 3144 cred = l->l_cred; 3145 nd.ni_cnd.cn_cred = cred; 3146 3147 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3148 pathbuf_destroy(pb); 3149 goto out; 3150 } 3151 vp = nd.ni_vp; 3152 pathbuf_destroy(pb); 3153 3154 /* Flags == 0 means only check for existence. */ 3155 if (mode) { 3156 vmode = 0; 3157 if (mode & R_OK) 3158 vmode |= VREAD; 3159 if (mode & W_OK) 3160 vmode |= VWRITE; 3161 if (mode & X_OK) 3162 vmode |= VEXEC; 3163 3164 error = VOP_ACCESS(vp, vmode, cred); 3165 if (!error && (vmode & VWRITE)) 3166 error = vn_writechk(vp); 3167 } 3168 vput(vp); 3169 out: 3170 if (!(flags & AT_EACCESS)) 3171 kauth_cred_free(cred); 3172 return (error); 3173 } 3174 3175 int 3176 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3177 register_t *retval) 3178 { 3179 /* { 3180 syscallarg(int) fd; 3181 syscallarg(const char *) path; 3182 syscallarg(int) amode; 3183 syscallarg(int) flag; 3184 } */ 3185 3186 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3187 SCARG(uap, amode), SCARG(uap, flag)); 3188 } 3189 3190 /* 3191 * Common code for all sys_stat functions, including compat versions. 3192 */ 3193 int 3194 do_sys_stat(const char *userpath, unsigned int nd_flag, 3195 struct stat *sb) 3196 { 3197 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3198 } 3199 3200 int 3201 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3202 unsigned int nd_flag, struct stat *sb) 3203 { 3204 int error; 3205 struct pathbuf *pb; 3206 struct nameidata nd; 3207 3208 KASSERT(l != NULL || fdat == AT_FDCWD); 3209 3210 error = pathbuf_copyin(userpath, &pb); 3211 if (error) { 3212 return error; 3213 } 3214 3215 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3216 3217 error = fd_nameiat(l, fdat, &nd); 3218 if (error != 0) { 3219 pathbuf_destroy(pb); 3220 return error; 3221 } 3222 error = vn_stat(nd.ni_vp, sb); 3223 vput(nd.ni_vp); 3224 pathbuf_destroy(pb); 3225 return error; 3226 } 3227 3228 /* 3229 * Get file status; this version follows links. 3230 */ 3231 /* ARGSUSED */ 3232 int 3233 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3234 { 3235 /* { 3236 syscallarg(const char *) path; 3237 syscallarg(struct stat *) ub; 3238 } */ 3239 struct stat sb; 3240 int error; 3241 3242 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3243 if (error) 3244 return error; 3245 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3246 } 3247 3248 /* 3249 * Get file status; this version does not follow links. 3250 */ 3251 /* ARGSUSED */ 3252 int 3253 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3254 { 3255 /* { 3256 syscallarg(const char *) path; 3257 syscallarg(struct stat *) ub; 3258 } */ 3259 struct stat sb; 3260 int error; 3261 3262 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3263 if (error) 3264 return error; 3265 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3266 } 3267 3268 int 3269 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3270 register_t *retval) 3271 { 3272 /* { 3273 syscallarg(int) fd; 3274 syscallarg(const char *) path; 3275 syscallarg(struct stat *) buf; 3276 syscallarg(int) flag; 3277 } */ 3278 unsigned int nd_flag; 3279 struct stat sb; 3280 int error; 3281 3282 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3283 nd_flag = NOFOLLOW; 3284 else 3285 nd_flag = FOLLOW; 3286 3287 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3288 &sb); 3289 if (error) 3290 return error; 3291 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3292 } 3293 3294 static int 3295 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3296 { 3297 int error; 3298 struct pathbuf *pb; 3299 struct nameidata nd; 3300 3301 error = pathbuf_copyin(path, &pb); 3302 if (error) { 3303 return error; 3304 } 3305 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3306 if ((error = namei(&nd)) != 0) { 3307 pathbuf_destroy(pb); 3308 return error; 3309 } 3310 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3311 vput(nd.ni_vp); 3312 pathbuf_destroy(pb); 3313 return error; 3314 } 3315 3316 /* 3317 * Get configurable pathname variables. 3318 */ 3319 /* ARGSUSED */ 3320 int 3321 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3322 register_t *retval) 3323 { 3324 /* { 3325 syscallarg(const char *) path; 3326 syscallarg(int) name; 3327 } */ 3328 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3329 FOLLOW); 3330 } 3331 3332 /* ARGSUSED */ 3333 int 3334 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3335 register_t *retval) 3336 { 3337 /* { 3338 syscallarg(const char *) path; 3339 syscallarg(int) name; 3340 } */ 3341 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3342 NOFOLLOW); 3343 } 3344 3345 /* 3346 * Return target name of a symbolic link. 3347 */ 3348 /* ARGSUSED */ 3349 int 3350 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3351 register_t *retval) 3352 { 3353 /* { 3354 syscallarg(const char *) path; 3355 syscallarg(char *) buf; 3356 syscallarg(size_t) count; 3357 } */ 3358 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3359 SCARG(uap, buf), SCARG(uap, count), retval); 3360 } 3361 3362 static int 3363 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3364 size_t count, register_t *retval) 3365 { 3366 struct vnode *vp; 3367 struct iovec aiov; 3368 struct uio auio; 3369 int error; 3370 struct pathbuf *pb; 3371 struct nameidata nd; 3372 3373 error = pathbuf_copyin(path, &pb); 3374 if (error) { 3375 return error; 3376 } 3377 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 3378 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3379 pathbuf_destroy(pb); 3380 return error; 3381 } 3382 vp = nd.ni_vp; 3383 pathbuf_destroy(pb); 3384 if (vp->v_type != VLNK) 3385 error = EINVAL; 3386 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3387 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3388 aiov.iov_base = buf; 3389 aiov.iov_len = count; 3390 auio.uio_iov = &aiov; 3391 auio.uio_iovcnt = 1; 3392 auio.uio_offset = 0; 3393 auio.uio_rw = UIO_READ; 3394 KASSERT(l == curlwp); 3395 auio.uio_vmspace = l->l_proc->p_vmspace; 3396 auio.uio_resid = count; 3397 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3398 *retval = count - auio.uio_resid; 3399 } 3400 vput(vp); 3401 return (error); 3402 } 3403 3404 int 3405 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3406 register_t *retval) 3407 { 3408 /* { 3409 syscallarg(int) fd; 3410 syscallarg(const char *) path; 3411 syscallarg(char *) buf; 3412 syscallarg(size_t) bufsize; 3413 } */ 3414 3415 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3416 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3417 } 3418 3419 /* 3420 * Change flags of a file given a path name. 3421 */ 3422 /* ARGSUSED */ 3423 int 3424 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3425 { 3426 /* { 3427 syscallarg(const char *) path; 3428 syscallarg(u_long) flags; 3429 } */ 3430 struct vnode *vp; 3431 int error; 3432 3433 error = namei_simple_user(SCARG(uap, path), 3434 NSM_FOLLOW_TRYEMULROOT, &vp); 3435 if (error != 0) 3436 return (error); 3437 error = change_flags(vp, SCARG(uap, flags), l); 3438 vput(vp); 3439 return (error); 3440 } 3441 3442 /* 3443 * Change flags of a file given a file descriptor. 3444 */ 3445 /* ARGSUSED */ 3446 int 3447 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3448 { 3449 /* { 3450 syscallarg(int) fd; 3451 syscallarg(u_long) flags; 3452 } */ 3453 struct vnode *vp; 3454 file_t *fp; 3455 int error; 3456 3457 /* fd_getvnode() will use the descriptor for us */ 3458 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3459 return (error); 3460 vp = fp->f_vnode; 3461 error = change_flags(vp, SCARG(uap, flags), l); 3462 VOP_UNLOCK(vp); 3463 fd_putfile(SCARG(uap, fd)); 3464 return (error); 3465 } 3466 3467 /* 3468 * Change flags of a file given a path name; this version does 3469 * not follow links. 3470 */ 3471 int 3472 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3473 { 3474 /* { 3475 syscallarg(const char *) path; 3476 syscallarg(u_long) flags; 3477 } */ 3478 struct vnode *vp; 3479 int error; 3480 3481 error = namei_simple_user(SCARG(uap, path), 3482 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3483 if (error != 0) 3484 return (error); 3485 error = change_flags(vp, SCARG(uap, flags), l); 3486 vput(vp); 3487 return (error); 3488 } 3489 3490 /* 3491 * Common routine to change flags of a file. 3492 */ 3493 int 3494 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3495 { 3496 struct vattr vattr; 3497 int error; 3498 3499 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3500 3501 vattr_null(&vattr); 3502 vattr.va_flags = flags; 3503 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3504 3505 return (error); 3506 } 3507 3508 /* 3509 * Change mode of a file given path name; this version follows links. 3510 */ 3511 /* ARGSUSED */ 3512 int 3513 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3514 { 3515 /* { 3516 syscallarg(const char *) path; 3517 syscallarg(int) mode; 3518 } */ 3519 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3520 SCARG(uap, mode), 0); 3521 } 3522 3523 int 3524 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3525 { 3526 int error; 3527 struct vnode *vp; 3528 namei_simple_flags_t ns_flag; 3529 3530 if (flags & AT_SYMLINK_NOFOLLOW) 3531 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3532 else 3533 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3534 3535 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3536 if (error != 0) 3537 return error; 3538 3539 error = change_mode(vp, mode, l); 3540 3541 vrele(vp); 3542 3543 return (error); 3544 } 3545 3546 /* 3547 * Change mode of a file given a file descriptor. 3548 */ 3549 /* ARGSUSED */ 3550 int 3551 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3552 { 3553 /* { 3554 syscallarg(int) fd; 3555 syscallarg(int) mode; 3556 } */ 3557 file_t *fp; 3558 int error; 3559 3560 /* fd_getvnode() will use the descriptor for us */ 3561 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3562 return (error); 3563 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3564 fd_putfile(SCARG(uap, fd)); 3565 return (error); 3566 } 3567 3568 int 3569 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3570 register_t *retval) 3571 { 3572 /* { 3573 syscallarg(int) fd; 3574 syscallarg(const char *) path; 3575 syscallarg(int) mode; 3576 syscallarg(int) flag; 3577 } */ 3578 3579 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3580 SCARG(uap, mode), SCARG(uap, flag)); 3581 } 3582 3583 /* 3584 * Change mode of a file given path name; this version does not follow links. 3585 */ 3586 /* ARGSUSED */ 3587 int 3588 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3589 { 3590 /* { 3591 syscallarg(const char *) path; 3592 syscallarg(int) mode; 3593 } */ 3594 int error; 3595 struct vnode *vp; 3596 3597 error = namei_simple_user(SCARG(uap, path), 3598 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3599 if (error != 0) 3600 return (error); 3601 3602 error = change_mode(vp, SCARG(uap, mode), l); 3603 3604 vrele(vp); 3605 return (error); 3606 } 3607 3608 /* 3609 * Common routine to set mode given a vnode. 3610 */ 3611 static int 3612 change_mode(struct vnode *vp, int mode, struct lwp *l) 3613 { 3614 struct vattr vattr; 3615 int error; 3616 3617 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3618 vattr_null(&vattr); 3619 vattr.va_mode = mode & ALLPERMS; 3620 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3621 VOP_UNLOCK(vp); 3622 return (error); 3623 } 3624 3625 /* 3626 * Set ownership given a path name; this version follows links. 3627 */ 3628 /* ARGSUSED */ 3629 int 3630 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3631 { 3632 /* { 3633 syscallarg(const char *) path; 3634 syscallarg(uid_t) uid; 3635 syscallarg(gid_t) gid; 3636 } */ 3637 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3638 SCARG(uap, gid), 0); 3639 } 3640 3641 int 3642 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3643 gid_t gid, int flags) 3644 { 3645 int error; 3646 struct vnode *vp; 3647 namei_simple_flags_t ns_flag; 3648 3649 if (flags & AT_SYMLINK_NOFOLLOW) 3650 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3651 else 3652 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3653 3654 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3655 if (error != 0) 3656 return error; 3657 3658 error = change_owner(vp, uid, gid, l, 0); 3659 3660 vrele(vp); 3661 3662 return (error); 3663 } 3664 3665 /* 3666 * Set ownership given a path name; this version follows links. 3667 * Provides POSIX semantics. 3668 */ 3669 /* ARGSUSED */ 3670 int 3671 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3672 { 3673 /* { 3674 syscallarg(const char *) path; 3675 syscallarg(uid_t) uid; 3676 syscallarg(gid_t) gid; 3677 } */ 3678 int error; 3679 struct vnode *vp; 3680 3681 error = namei_simple_user(SCARG(uap, path), 3682 NSM_FOLLOW_TRYEMULROOT, &vp); 3683 if (error != 0) 3684 return (error); 3685 3686 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3687 3688 vrele(vp); 3689 return (error); 3690 } 3691 3692 /* 3693 * Set ownership given a file descriptor. 3694 */ 3695 /* ARGSUSED */ 3696 int 3697 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3698 { 3699 /* { 3700 syscallarg(int) fd; 3701 syscallarg(uid_t) uid; 3702 syscallarg(gid_t) gid; 3703 } */ 3704 int error; 3705 file_t *fp; 3706 3707 /* fd_getvnode() will use the descriptor for us */ 3708 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3709 return (error); 3710 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3711 l, 0); 3712 fd_putfile(SCARG(uap, fd)); 3713 return (error); 3714 } 3715 3716 int 3717 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3718 register_t *retval) 3719 { 3720 /* { 3721 syscallarg(int) fd; 3722 syscallarg(const char *) path; 3723 syscallarg(uid_t) owner; 3724 syscallarg(gid_t) group; 3725 syscallarg(int) flag; 3726 } */ 3727 3728 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3729 SCARG(uap, owner), SCARG(uap, group), 3730 SCARG(uap, flag)); 3731 } 3732 3733 /* 3734 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3735 */ 3736 /* ARGSUSED */ 3737 int 3738 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3739 { 3740 /* { 3741 syscallarg(int) fd; 3742 syscallarg(uid_t) uid; 3743 syscallarg(gid_t) gid; 3744 } */ 3745 int error; 3746 file_t *fp; 3747 3748 /* fd_getvnode() will use the descriptor for us */ 3749 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3750 return (error); 3751 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3752 l, 1); 3753 fd_putfile(SCARG(uap, fd)); 3754 return (error); 3755 } 3756 3757 /* 3758 * Set ownership given a path name; this version does not follow links. 3759 */ 3760 /* ARGSUSED */ 3761 int 3762 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3763 { 3764 /* { 3765 syscallarg(const char *) path; 3766 syscallarg(uid_t) uid; 3767 syscallarg(gid_t) gid; 3768 } */ 3769 int error; 3770 struct vnode *vp; 3771 3772 error = namei_simple_user(SCARG(uap, path), 3773 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3774 if (error != 0) 3775 return (error); 3776 3777 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3778 3779 vrele(vp); 3780 return (error); 3781 } 3782 3783 /* 3784 * Set ownership given a path name; this version does not follow links. 3785 * Provides POSIX/XPG semantics. 3786 */ 3787 /* ARGSUSED */ 3788 int 3789 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3790 { 3791 /* { 3792 syscallarg(const char *) path; 3793 syscallarg(uid_t) uid; 3794 syscallarg(gid_t) gid; 3795 } */ 3796 int error; 3797 struct vnode *vp; 3798 3799 error = namei_simple_user(SCARG(uap, path), 3800 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3801 if (error != 0) 3802 return (error); 3803 3804 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3805 3806 vrele(vp); 3807 return (error); 3808 } 3809 3810 /* 3811 * Common routine to set ownership given a vnode. 3812 */ 3813 static int 3814 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3815 int posix_semantics) 3816 { 3817 struct vattr vattr; 3818 mode_t newmode; 3819 int error; 3820 3821 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3822 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3823 goto out; 3824 3825 #define CHANGED(x) ((int)(x) != -1) 3826 newmode = vattr.va_mode; 3827 if (posix_semantics) { 3828 /* 3829 * POSIX/XPG semantics: if the caller is not the super-user, 3830 * clear set-user-id and set-group-id bits. Both POSIX and 3831 * the XPG consider the behaviour for calls by the super-user 3832 * implementation-defined; we leave the set-user-id and set- 3833 * group-id settings intact in that case. 3834 */ 3835 if (vattr.va_mode & S_ISUID) { 3836 if (kauth_authorize_vnode(l->l_cred, 3837 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3838 newmode &= ~S_ISUID; 3839 } 3840 if (vattr.va_mode & S_ISGID) { 3841 if (kauth_authorize_vnode(l->l_cred, 3842 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3843 newmode &= ~S_ISGID; 3844 } 3845 } else { 3846 /* 3847 * NetBSD semantics: when changing owner and/or group, 3848 * clear the respective bit(s). 3849 */ 3850 if (CHANGED(uid)) 3851 newmode &= ~S_ISUID; 3852 if (CHANGED(gid)) 3853 newmode &= ~S_ISGID; 3854 } 3855 /* Update va_mode iff altered. */ 3856 if (vattr.va_mode == newmode) 3857 newmode = VNOVAL; 3858 3859 vattr_null(&vattr); 3860 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3861 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3862 vattr.va_mode = newmode; 3863 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3864 #undef CHANGED 3865 3866 out: 3867 VOP_UNLOCK(vp); 3868 return (error); 3869 } 3870 3871 /* 3872 * Set the access and modification times given a path name; this 3873 * version follows links. 3874 */ 3875 /* ARGSUSED */ 3876 int 3877 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3878 register_t *retval) 3879 { 3880 /* { 3881 syscallarg(const char *) path; 3882 syscallarg(const struct timeval *) tptr; 3883 } */ 3884 3885 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3886 SCARG(uap, tptr), UIO_USERSPACE); 3887 } 3888 3889 /* 3890 * Set the access and modification times given a file descriptor. 3891 */ 3892 /* ARGSUSED */ 3893 int 3894 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3895 register_t *retval) 3896 { 3897 /* { 3898 syscallarg(int) fd; 3899 syscallarg(const struct timeval *) tptr; 3900 } */ 3901 int error; 3902 file_t *fp; 3903 3904 /* fd_getvnode() will use the descriptor for us */ 3905 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3906 return (error); 3907 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3908 UIO_USERSPACE); 3909 fd_putfile(SCARG(uap, fd)); 3910 return (error); 3911 } 3912 3913 int 3914 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3915 register_t *retval) 3916 { 3917 /* { 3918 syscallarg(int) fd; 3919 syscallarg(const struct timespec *) tptr; 3920 } */ 3921 int error; 3922 file_t *fp; 3923 3924 /* fd_getvnode() will use the descriptor for us */ 3925 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3926 return (error); 3927 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3928 SCARG(uap, tptr), UIO_USERSPACE); 3929 fd_putfile(SCARG(uap, fd)); 3930 return (error); 3931 } 3932 3933 /* 3934 * Set the access and modification times given a path name; this 3935 * version does not follow links. 3936 */ 3937 int 3938 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3939 register_t *retval) 3940 { 3941 /* { 3942 syscallarg(const char *) path; 3943 syscallarg(const struct timeval *) tptr; 3944 } */ 3945 3946 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3947 SCARG(uap, tptr), UIO_USERSPACE); 3948 } 3949 3950 int 3951 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3952 register_t *retval) 3953 { 3954 /* { 3955 syscallarg(int) fd; 3956 syscallarg(const char *) path; 3957 syscallarg(const struct timespec *) tptr; 3958 syscallarg(int) flag; 3959 } */ 3960 int follow; 3961 const struct timespec *tptr; 3962 int error; 3963 3964 tptr = SCARG(uap, tptr); 3965 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3966 3967 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3968 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3969 3970 return error; 3971 } 3972 3973 /* 3974 * Common routine to set access and modification times given a vnode. 3975 */ 3976 int 3977 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3978 const struct timespec *tptr, enum uio_seg seg) 3979 { 3980 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3981 } 3982 3983 int 3984 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3985 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3986 { 3987 struct vattr vattr; 3988 int error, dorele = 0; 3989 namei_simple_flags_t sflags; 3990 bool vanull, setbirthtime; 3991 struct timespec ts[2]; 3992 3993 KASSERT(l != NULL || fdat == AT_FDCWD); 3994 3995 /* 3996 * I have checked all callers and they pass either FOLLOW, 3997 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3998 * is 0. More to the point, they don't pass anything else. 3999 * Let's keep it that way at least until the namei interfaces 4000 * are fully sanitized. 4001 */ 4002 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 4003 sflags = (flag == FOLLOW) ? 4004 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 4005 4006 if (tptr == NULL) { 4007 vanull = true; 4008 nanotime(&ts[0]); 4009 ts[1] = ts[0]; 4010 } else { 4011 vanull = false; 4012 if (seg != UIO_SYSSPACE) { 4013 error = copyin(tptr, ts, sizeof (ts)); 4014 if (error != 0) 4015 return error; 4016 } else { 4017 ts[0] = tptr[0]; 4018 ts[1] = tptr[1]; 4019 } 4020 } 4021 4022 if (ts[0].tv_nsec == UTIME_NOW) { 4023 nanotime(&ts[0]); 4024 if (ts[1].tv_nsec == UTIME_NOW) { 4025 vanull = true; 4026 ts[1] = ts[0]; 4027 } 4028 } else if (ts[1].tv_nsec == UTIME_NOW) 4029 nanotime(&ts[1]); 4030 4031 if (vp == NULL) { 4032 /* note: SEG describes TPTR, not PATH; PATH is always user */ 4033 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 4034 if (error != 0) 4035 return error; 4036 dorele = 1; 4037 } 4038 4039 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4040 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 4041 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 4042 vattr_null(&vattr); 4043 4044 if (ts[0].tv_nsec != UTIME_OMIT) 4045 vattr.va_atime = ts[0]; 4046 4047 if (ts[1].tv_nsec != UTIME_OMIT) { 4048 vattr.va_mtime = ts[1]; 4049 if (setbirthtime) 4050 vattr.va_birthtime = ts[1]; 4051 } 4052 4053 if (vanull) 4054 vattr.va_vaflags |= VA_UTIMES_NULL; 4055 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4056 VOP_UNLOCK(vp); 4057 4058 if (dorele != 0) 4059 vrele(vp); 4060 4061 return error; 4062 } 4063 4064 int 4065 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4066 const struct timeval *tptr, enum uio_seg seg) 4067 { 4068 struct timespec ts[2]; 4069 struct timespec *tsptr = NULL; 4070 int error; 4071 4072 if (tptr != NULL) { 4073 struct timeval tv[2]; 4074 4075 if (seg != UIO_SYSSPACE) { 4076 error = copyin(tptr, tv, sizeof(tv)); 4077 if (error != 0) 4078 return error; 4079 tptr = tv; 4080 } 4081 4082 if ((tptr[0].tv_usec == UTIME_NOW) || 4083 (tptr[0].tv_usec == UTIME_OMIT)) 4084 ts[0].tv_nsec = tptr[0].tv_usec; 4085 else { 4086 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4087 return EINVAL; 4088 4089 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4090 } 4091 4092 if ((tptr[1].tv_usec == UTIME_NOW) || 4093 (tptr[1].tv_usec == UTIME_OMIT)) 4094 ts[1].tv_nsec = tptr[1].tv_usec; 4095 else { 4096 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4097 return EINVAL; 4098 4099 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4100 } 4101 4102 tsptr = &ts[0]; 4103 } 4104 4105 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4106 } 4107 4108 /* 4109 * Truncate a file given its path name. 4110 */ 4111 /* ARGSUSED */ 4112 int 4113 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 4114 { 4115 /* { 4116 syscallarg(const char *) path; 4117 syscallarg(int) pad; 4118 syscallarg(off_t) length; 4119 } */ 4120 struct vnode *vp; 4121 struct vattr vattr; 4122 int error; 4123 4124 if (SCARG(uap, length) < 0) 4125 return EINVAL; 4126 4127 error = namei_simple_user(SCARG(uap, path), 4128 NSM_FOLLOW_TRYEMULROOT, &vp); 4129 if (error != 0) 4130 return (error); 4131 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4132 if (vp->v_type == VDIR) 4133 error = EISDIR; 4134 else if ((error = vn_writechk(vp)) == 0 && 4135 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4136 vattr_null(&vattr); 4137 vattr.va_size = SCARG(uap, length); 4138 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4139 } 4140 vput(vp); 4141 return (error); 4142 } 4143 4144 /* 4145 * Truncate a file given a file descriptor. 4146 */ 4147 /* ARGSUSED */ 4148 int 4149 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 4150 { 4151 /* { 4152 syscallarg(int) fd; 4153 syscallarg(int) pad; 4154 syscallarg(off_t) length; 4155 } */ 4156 file_t *fp; 4157 int error, fd = SCARG(uap, fd); 4158 4159 fp = fd_getfile(fd); 4160 if (fp == NULL) 4161 return EBADF; 4162 if (fp->f_ops->fo_truncate == NULL) 4163 error = EOPNOTSUPP; 4164 else 4165 error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length)); 4166 4167 fd_putfile(fd); 4168 return error; 4169 } 4170 4171 /* 4172 * Sync an open file. 4173 */ 4174 /* ARGSUSED */ 4175 int 4176 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4177 { 4178 /* { 4179 syscallarg(int) fd; 4180 } */ 4181 struct vnode *vp; 4182 file_t *fp; 4183 int error; 4184 4185 /* fd_getvnode() will use the descriptor for us */ 4186 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4187 return (error); 4188 vp = fp->f_vnode; 4189 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4190 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4191 VOP_UNLOCK(vp); 4192 fd_putfile(SCARG(uap, fd)); 4193 return (error); 4194 } 4195 4196 /* 4197 * Sync a range of file data. API modeled after that found in AIX. 4198 * 4199 * FDATASYNC indicates that we need only save enough metadata to be able 4200 * to re-read the written data. 4201 */ 4202 /* ARGSUSED */ 4203 int 4204 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4205 { 4206 /* { 4207 syscallarg(int) fd; 4208 syscallarg(int) flags; 4209 syscallarg(off_t) start; 4210 syscallarg(off_t) length; 4211 } */ 4212 struct vnode *vp; 4213 file_t *fp; 4214 int flags, nflags; 4215 off_t s, e, len; 4216 int error; 4217 4218 /* fd_getvnode() will use the descriptor for us */ 4219 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4220 return (error); 4221 4222 if ((fp->f_flag & FWRITE) == 0) { 4223 error = EBADF; 4224 goto out; 4225 } 4226 4227 flags = SCARG(uap, flags); 4228 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4229 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4230 error = EINVAL; 4231 goto out; 4232 } 4233 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4234 if (flags & FDATASYNC) 4235 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4236 else 4237 nflags = FSYNC_WAIT; 4238 if (flags & FDISKSYNC) 4239 nflags |= FSYNC_CACHE; 4240 4241 len = SCARG(uap, length); 4242 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4243 if (len) { 4244 s = SCARG(uap, start); 4245 if (s < 0 || len < 0 || len > OFF_T_MAX - s) { 4246 error = EINVAL; 4247 goto out; 4248 } 4249 e = s + len; 4250 KASSERT(s <= e); 4251 } else { 4252 e = 0; 4253 s = 0; 4254 } 4255 4256 vp = fp->f_vnode; 4257 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4258 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4259 VOP_UNLOCK(vp); 4260 out: 4261 fd_putfile(SCARG(uap, fd)); 4262 return (error); 4263 } 4264 4265 /* 4266 * Sync the data of an open file. 4267 */ 4268 /* ARGSUSED */ 4269 int 4270 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4271 { 4272 /* { 4273 syscallarg(int) fd; 4274 } */ 4275 struct vnode *vp; 4276 file_t *fp; 4277 int error; 4278 4279 /* fd_getvnode() will use the descriptor for us */ 4280 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4281 return (error); 4282 vp = fp->f_vnode; 4283 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4284 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4285 VOP_UNLOCK(vp); 4286 fd_putfile(SCARG(uap, fd)); 4287 return (error); 4288 } 4289 4290 /* 4291 * Rename files, (standard) BSD semantics frontend. 4292 */ 4293 /* ARGSUSED */ 4294 int 4295 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4296 { 4297 /* { 4298 syscallarg(const char *) from; 4299 syscallarg(const char *) to; 4300 } */ 4301 4302 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4303 SCARG(uap, to), UIO_USERSPACE, 0)); 4304 } 4305 4306 int 4307 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4308 register_t *retval) 4309 { 4310 /* { 4311 syscallarg(int) fromfd; 4312 syscallarg(const char *) from; 4313 syscallarg(int) tofd; 4314 syscallarg(const char *) to; 4315 } */ 4316 4317 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4318 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4319 } 4320 4321 /* 4322 * Rename files, POSIX semantics frontend. 4323 */ 4324 /* ARGSUSED */ 4325 int 4326 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4327 { 4328 /* { 4329 syscallarg(const char *) from; 4330 syscallarg(const char *) to; 4331 } */ 4332 4333 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4334 SCARG(uap, to), UIO_USERSPACE, 1)); 4335 } 4336 4337 /* 4338 * Rename files. Source and destination must either both be directories, 4339 * or both not be directories. If target is a directory, it must be empty. 4340 * If `from' and `to' refer to the same object, the value of the `retain' 4341 * argument is used to determine whether `from' will be 4342 * 4343 * (retain == 0) deleted unless `from' and `to' refer to the same 4344 * object in the file system's name space (BSD). 4345 * (retain == 1) always retained (POSIX). 4346 * 4347 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4348 */ 4349 int 4350 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4351 { 4352 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4353 } 4354 4355 static int 4356 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4357 const char *to, enum uio_seg seg, int retain) 4358 { 4359 struct pathbuf *fpb, *tpb; 4360 struct nameidata fnd, tnd; 4361 struct vnode *fdvp, *fvp; 4362 struct vnode *tdvp, *tvp; 4363 struct mount *mp, *tmp; 4364 int error; 4365 4366 KASSERT(l != NULL || fromfd == AT_FDCWD); 4367 KASSERT(l != NULL || tofd == AT_FDCWD); 4368 4369 error = pathbuf_maybe_copyin(from, seg, &fpb); 4370 if (error) 4371 goto out0; 4372 KASSERT(fpb != NULL); 4373 4374 error = pathbuf_maybe_copyin(to, seg, &tpb); 4375 if (error) 4376 goto out1; 4377 KASSERT(tpb != NULL); 4378 4379 /* 4380 * Lookup from. 4381 * 4382 * XXX LOCKPARENT is wrong because we don't actually want it 4383 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4384 * insane, so for the time being we need to leave it like this. 4385 */ 4386 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4387 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4388 goto out2; 4389 4390 /* 4391 * Pull out the important results of the lookup, fdvp and fvp. 4392 * Of course, fvp is bogus because we're about to unlock fdvp. 4393 */ 4394 fdvp = fnd.ni_dvp; 4395 fvp = fnd.ni_vp; 4396 mp = fdvp->v_mount; 4397 KASSERT(fdvp != NULL); 4398 KASSERT(fvp != NULL); 4399 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4400 /* 4401 * Bracket the operation with fstrans_start()/fstrans_done(). 4402 * 4403 * Inside the bracket this file system cannot be unmounted so 4404 * a vnode on this file system cannot change its v_mount. 4405 * A vnode on another file system may still change to dead mount. 4406 */ 4407 fstrans_start(mp); 4408 4409 /* 4410 * Make sure neither fdvp nor fvp is locked. 4411 */ 4412 if (fdvp != fvp) 4413 VOP_UNLOCK(fdvp); 4414 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4415 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4416 4417 /* 4418 * Reject renaming `.' and `..'. Can't do this until after 4419 * namei because we need namei's parsing to find the final 4420 * component name. (namei should just leave us with the final 4421 * component name and not look it up itself, but anyway...) 4422 * 4423 * This was here before because we used to relookup from 4424 * instead of to and relookup requires the caller to check 4425 * this, but now file systems may depend on this check, so we 4426 * must retain it until the file systems are all rototilled. 4427 */ 4428 if (((fnd.ni_cnd.cn_namelen == 1) && 4429 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4430 ((fnd.ni_cnd.cn_namelen == 2) && 4431 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4432 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4433 error = EINVAL; /* XXX EISDIR? */ 4434 goto abort0; 4435 } 4436 4437 /* 4438 * Lookup to. 4439 * 4440 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4441 * fvp here to decide whether to add CREATEDIR is a load of 4442 * bollocks because fvp might be the wrong node by now, since 4443 * fdvp is unlocked. 4444 * 4445 * XXX Why not pass CREATEDIR always? 4446 */ 4447 NDINIT(&tnd, RENAME, 4448 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4449 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4450 tpb); 4451 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4452 goto abort0; 4453 4454 /* 4455 * Pull out the important results of the lookup, tdvp and tvp. 4456 * Of course, tvp is bogus because we're about to unlock tdvp. 4457 */ 4458 tdvp = tnd.ni_dvp; 4459 tvp = tnd.ni_vp; 4460 KASSERT(tdvp != NULL); 4461 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4462 4463 if (fvp->v_type == VDIR) 4464 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4465 /* 4466 * Make sure neither tdvp nor tvp is locked. 4467 */ 4468 if (tdvp != tvp) 4469 VOP_UNLOCK(tdvp); 4470 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4471 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4472 4473 /* 4474 * Reject renaming onto `.' or `..'. relookup is unhappy with 4475 * these, which is why we must do this here. Once upon a time 4476 * we relooked up from instead of to, and consequently didn't 4477 * need this check, but now that we relookup to instead of 4478 * from, we need this; and we shall need it forever forward 4479 * until the VOP_RENAME protocol changes, because file systems 4480 * will no doubt begin to depend on this check. 4481 */ 4482 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4483 error = EISDIR; 4484 goto abort1; 4485 } 4486 if ((tnd.ni_cnd.cn_namelen == 2) && 4487 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4488 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4489 error = EINVAL; 4490 goto abort1; 4491 } 4492 4493 /* 4494 * Make sure the mount points match. Although we don't hold 4495 * any vnode locks, the v_mount on fdvp file system are stable. 4496 * 4497 * Unmounting another file system at an inopportune moment may 4498 * cause tdvp to disappear and change its v_mount to dead. 4499 * 4500 * So in either case different v_mount means cross-device rename. 4501 */ 4502 KASSERT(mp != NULL); 4503 tmp = tdvp->v_mount; 4504 4505 if (mp != tmp) { 4506 error = EXDEV; 4507 goto abort1; 4508 } 4509 4510 /* 4511 * Take the vfs rename lock to avoid cross-directory screw cases. 4512 * Nothing is locked currently, so taking this lock is safe. 4513 */ 4514 error = VFS_RENAMELOCK_ENTER(mp); 4515 if (error) 4516 goto abort1; 4517 4518 /* 4519 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4520 * and nothing is locked except for the vfs rename lock. 4521 * 4522 * The next step is a little rain dance to conform to the 4523 * insane lock protocol, even though it does nothing to ward 4524 * off race conditions. 4525 * 4526 * We need tdvp and tvp to be locked. However, because we have 4527 * unlocked tdvp in order to hold no locks while we take the 4528 * vfs rename lock, tvp may be wrong here, and we can't safely 4529 * lock it even if the sensible file systems will just unlock 4530 * it straight away. Consequently, we must lock tdvp and then 4531 * relookup tvp to get it locked. 4532 * 4533 * Finally, because the VOP_RENAME protocol is brain-damaged 4534 * and various file systems insanely depend on the semantics of 4535 * this brain damage, the lookup of to must be the last lookup 4536 * before VOP_RENAME. 4537 */ 4538 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4539 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4540 if (error) 4541 goto abort2; 4542 4543 /* 4544 * Drop the old tvp and pick up the new one -- which might be 4545 * the same, but that doesn't matter to us. After this, tdvp 4546 * and tvp should both be locked. 4547 */ 4548 if (tvp != NULL) 4549 vrele(tvp); 4550 tvp = tnd.ni_vp; 4551 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4552 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4553 4554 /* 4555 * The old do_sys_rename had various consistency checks here 4556 * involving fvp and tvp. fvp is bogus already here, and tvp 4557 * will become bogus soon in any sensible file system, so the 4558 * only purpose in putting these checks here is to give lip 4559 * service to these screw cases and to acknowledge that they 4560 * exist, not actually to handle them, but here you go 4561 * anyway... 4562 */ 4563 4564 /* 4565 * Acknowledge that directories and non-directories aren't 4566 * supposed to mix. 4567 */ 4568 if (tvp != NULL) { 4569 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4570 error = ENOTDIR; 4571 goto abort3; 4572 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4573 error = EISDIR; 4574 goto abort3; 4575 } 4576 } 4577 4578 /* 4579 * Acknowledge some random screw case, among the dozens that 4580 * might arise. 4581 */ 4582 if (fvp == tdvp) { 4583 error = EINVAL; 4584 goto abort3; 4585 } 4586 4587 /* 4588 * Acknowledge that POSIX has a wacky screw case. 4589 * 4590 * XXX Eventually the retain flag needs to be passed on to 4591 * VOP_RENAME. 4592 */ 4593 if (fvp == tvp) { 4594 if (retain) { 4595 error = 0; 4596 goto abort3; 4597 } else if ((fdvp == tdvp) && 4598 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4599 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4600 fnd.ni_cnd.cn_namelen))) { 4601 error = 0; 4602 goto abort3; 4603 } 4604 } 4605 4606 /* 4607 * Make sure veriexec can screw us up. (But a race can screw 4608 * up veriexec, of course -- remember, fvp and (soon) tvp are 4609 * bogus.) 4610 */ 4611 #if NVERIEXEC > 0 4612 { 4613 char *f1, *f2; 4614 size_t f1_len; 4615 size_t f2_len; 4616 4617 f1_len = fnd.ni_cnd.cn_namelen + 1; 4618 f1 = kmem_alloc(f1_len, KM_SLEEP); 4619 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4620 4621 f2_len = tnd.ni_cnd.cn_namelen + 1; 4622 f2 = kmem_alloc(f2_len, KM_SLEEP); 4623 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4624 4625 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4626 4627 kmem_free(f1, f1_len); 4628 kmem_free(f2, f2_len); 4629 4630 if (error) 4631 goto abort3; 4632 } 4633 #endif /* NVERIEXEC > 0 */ 4634 4635 /* 4636 * All ready. Incant the rename vop. 4637 */ 4638 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4639 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4640 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4641 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4642 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4643 4644 /* 4645 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4646 * tdvp and tvp. But we can't assert any of that. 4647 */ 4648 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4649 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4650 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4651 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4652 4653 /* 4654 * So all we have left to do is to drop the rename lock and 4655 * destroy the pathbufs. 4656 */ 4657 VFS_RENAMELOCK_EXIT(mp); 4658 fstrans_done(mp); 4659 goto out2; 4660 4661 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4662 VOP_UNLOCK(tvp); 4663 abort2: VOP_UNLOCK(tdvp); 4664 VFS_RENAMELOCK_EXIT(mp); 4665 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4666 vrele(tdvp); 4667 if (tvp != NULL) 4668 vrele(tvp); 4669 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4670 vrele(fdvp); 4671 vrele(fvp); 4672 fstrans_done(mp); 4673 out2: pathbuf_destroy(tpb); 4674 out1: pathbuf_destroy(fpb); 4675 out0: return error; 4676 } 4677 4678 /* 4679 * Make a directory file. 4680 */ 4681 /* ARGSUSED */ 4682 int 4683 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4684 { 4685 /* { 4686 syscallarg(const char *) path; 4687 syscallarg(int) mode; 4688 } */ 4689 4690 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4691 SCARG(uap, mode), UIO_USERSPACE); 4692 } 4693 4694 int 4695 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4696 register_t *retval) 4697 { 4698 /* { 4699 syscallarg(int) fd; 4700 syscallarg(const char *) path; 4701 syscallarg(int) mode; 4702 } */ 4703 4704 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4705 SCARG(uap, mode), UIO_USERSPACE); 4706 } 4707 4708 4709 int 4710 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4711 { 4712 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4713 } 4714 4715 static int 4716 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4717 enum uio_seg seg) 4718 { 4719 struct proc *p = curlwp->l_proc; 4720 struct vnode *vp; 4721 struct vattr vattr; 4722 int error; 4723 struct pathbuf *pb; 4724 struct nameidata nd; 4725 4726 KASSERT(l != NULL || fdat == AT_FDCWD); 4727 4728 /* XXX bollocks, should pass in a pathbuf */ 4729 error = pathbuf_maybe_copyin(path, seg, &pb); 4730 if (error) { 4731 return error; 4732 } 4733 4734 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4735 4736 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4737 pathbuf_destroy(pb); 4738 return (error); 4739 } 4740 vp = nd.ni_vp; 4741 if (vp != NULL) { 4742 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4743 if (nd.ni_dvp == vp) 4744 vrele(nd.ni_dvp); 4745 else 4746 vput(nd.ni_dvp); 4747 vrele(vp); 4748 pathbuf_destroy(pb); 4749 return (EEXIST); 4750 } 4751 vattr_null(&vattr); 4752 vattr.va_type = VDIR; 4753 /* We will read cwdi->cwdi_cmask unlocked. */ 4754 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4755 nd.ni_cnd.cn_flags |= WILLBEDIR; 4756 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4757 if (!error) 4758 vrele(nd.ni_vp); 4759 vput(nd.ni_dvp); 4760 pathbuf_destroy(pb); 4761 return (error); 4762 } 4763 4764 /* 4765 * Remove a directory file. 4766 */ 4767 /* ARGSUSED */ 4768 int 4769 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4770 { 4771 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4772 AT_REMOVEDIR, UIO_USERSPACE); 4773 } 4774 4775 /* 4776 * Read a block of directory entries in a file system independent format. 4777 */ 4778 int 4779 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4780 { 4781 /* { 4782 syscallarg(int) fd; 4783 syscallarg(char *) buf; 4784 syscallarg(size_t) count; 4785 } */ 4786 file_t *fp; 4787 int error, done; 4788 4789 /* fd_getvnode() will use the descriptor for us */ 4790 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4791 return (error); 4792 if ((fp->f_flag & FREAD) == 0) { 4793 error = EBADF; 4794 goto out; 4795 } 4796 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4797 SCARG(uap, count), &done, l, 0, 0); 4798 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4799 *retval = done; 4800 out: 4801 fd_putfile(SCARG(uap, fd)); 4802 return (error); 4803 } 4804 4805 /* 4806 * Set the mode mask for creation of filesystem nodes. 4807 */ 4808 int 4809 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4810 { 4811 /* { 4812 syscallarg(mode_t) newmask; 4813 } */ 4814 4815 /* 4816 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4817 * serialization with those reads is required. It's important to 4818 * return a coherent answer for the caller of umask() though, and 4819 * the atomic operation accomplishes that. 4820 */ 4821 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4822 SCARG(uap, newmask) & ALLPERMS); 4823 4824 return (0); 4825 } 4826 4827 int 4828 dorevoke(struct vnode *vp, kauth_cred_t cred) 4829 { 4830 struct vattr vattr; 4831 int error, fs_decision; 4832 4833 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4834 error = VOP_GETATTR(vp, &vattr, cred); 4835 VOP_UNLOCK(vp); 4836 if (error != 0) 4837 return error; 4838 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4839 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4840 fs_decision); 4841 if (!error) 4842 VOP_REVOKE(vp, REVOKEALL); 4843 return (error); 4844 } 4845 4846 /* 4847 * Void all references to file by ripping underlying filesystem 4848 * away from vnode. 4849 */ 4850 /* ARGSUSED */ 4851 int 4852 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4853 { 4854 /* { 4855 syscallarg(const char *) path; 4856 } */ 4857 struct vnode *vp; 4858 int error; 4859 4860 error = namei_simple_user(SCARG(uap, path), 4861 NSM_FOLLOW_TRYEMULROOT, &vp); 4862 if (error != 0) 4863 return (error); 4864 error = dorevoke(vp, l->l_cred); 4865 vrele(vp); 4866 return (error); 4867 } 4868 4869 /* 4870 * Allocate backing store for a file, filling a hole without having to 4871 * explicitly write anything out. 4872 */ 4873 /* ARGSUSED */ 4874 int 4875 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4876 register_t *retval) 4877 { 4878 /* { 4879 syscallarg(int) fd; 4880 syscallarg(off_t) pos; 4881 syscallarg(off_t) len; 4882 } */ 4883 int fd; 4884 off_t pos, len; 4885 struct file *fp; 4886 struct vnode *vp; 4887 int error; 4888 4889 fd = SCARG(uap, fd); 4890 pos = SCARG(uap, pos); 4891 len = SCARG(uap, len); 4892 4893 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4894 *retval = EINVAL; 4895 return 0; 4896 } 4897 4898 error = fd_getvnode(fd, &fp); 4899 if (error) { 4900 *retval = error; 4901 return 0; 4902 } 4903 if ((fp->f_flag & FWRITE) == 0) { 4904 error = EBADF; 4905 goto fail; 4906 } 4907 vp = fp->f_vnode; 4908 4909 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4910 if (vp->v_type == VDIR) { 4911 error = EISDIR; 4912 } else { 4913 error = VOP_FALLOCATE(vp, pos, len); 4914 } 4915 VOP_UNLOCK(vp); 4916 4917 fail: 4918 fd_putfile(fd); 4919 *retval = error; 4920 return 0; 4921 } 4922 4923 /* 4924 * Deallocate backing store for a file, creating a hole. Also used for 4925 * invoking TRIM on disks. 4926 */ 4927 /* ARGSUSED */ 4928 int 4929 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4930 register_t *retval) 4931 { 4932 /* { 4933 syscallarg(int) fd; 4934 syscallarg(off_t) pos; 4935 syscallarg(off_t) len; 4936 } */ 4937 int fd; 4938 off_t pos, len; 4939 struct file *fp; 4940 struct vnode *vp; 4941 int error; 4942 4943 fd = SCARG(uap, fd); 4944 pos = SCARG(uap, pos); 4945 len = SCARG(uap, len); 4946 4947 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4948 return EINVAL; 4949 } 4950 4951 error = fd_getvnode(fd, &fp); 4952 if (error) { 4953 return error; 4954 } 4955 if ((fp->f_flag & FWRITE) == 0) { 4956 error = EBADF; 4957 goto fail; 4958 } 4959 vp = fp->f_vnode; 4960 4961 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4962 if (vp->v_type == VDIR) { 4963 error = EISDIR; 4964 } else { 4965 error = VOP_FDISCARD(vp, pos, len); 4966 } 4967 VOP_UNLOCK(vp); 4968 4969 fail: 4970 fd_putfile(fd); 4971 return error; 4972 } 4973