1 /* $NetBSD: vfs_syscalls.c,v 1.560 2023/07/10 02:31:55 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.560 2023/07/10 02:31:55 christos Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/fstrans.h> 91 #include <sys/proc.h> 92 #include <sys/uio.h> 93 #include <sys/kmem.h> 94 #include <sys/dirent.h> 95 #include <sys/sysctl.h> 96 #include <sys/syscallargs.h> 97 #include <sys/vfs_syscalls.h> 98 #include <sys/quota.h> 99 #include <sys/quotactl.h> 100 #include <sys/ktrace.h> 101 #ifdef FILEASSOC 102 #include <sys/fileassoc.h> 103 #endif /* FILEASSOC */ 104 #include <sys/extattr.h> 105 #include <sys/verified_exec.h> 106 #include <sys/kauth.h> 107 #include <sys/atomic.h> 108 #include <sys/module.h> 109 #include <sys/buf.h> 110 #include <sys/event.h> 111 #include <sys/compat_stub.h> 112 113 #include <miscfs/genfs/genfs.h> 114 #include <miscfs/specfs/specdev.h> 115 116 #include <nfs/rpcv2.h> 117 #include <nfs/nfsproto.h> 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 121 /* XXX this shouldn't be here */ 122 #ifndef OFF_T_MAX 123 #define OFF_T_MAX __type_max(off_t) 124 #endif 125 126 static int change_flags(struct vnode *, u_long, struct lwp *); 127 static int change_mode(struct vnode *, int, struct lwp *); 128 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 129 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 130 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 131 enum uio_seg); 132 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 133 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 134 enum uio_seg); 135 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 136 enum uio_seg, int); 137 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 138 size_t, register_t *); 139 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 140 141 static int fd_nameiat(struct lwp *, int, struct nameidata *); 142 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 143 namei_simple_flags_t, struct vnode **); 144 145 /* 146 * This table is used to maintain compatibility with 4.3BSD 147 * and NetBSD 0.9 mount syscalls - and possibly other systems. 148 * Note, the order is important! 149 * 150 * Do not modify this table. It should only contain filesystems 151 * supported by NetBSD 0.9 and 4.3BSD. 152 */ 153 const char * const mountcompatnames[] = { 154 NULL, /* 0 = MOUNT_NONE */ 155 MOUNT_FFS, /* 1 = MOUNT_UFS */ 156 MOUNT_NFS, /* 2 */ 157 MOUNT_MFS, /* 3 */ 158 MOUNT_MSDOS, /* 4 */ 159 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 160 MOUNT_FDESC, /* 6 */ 161 MOUNT_KERNFS, /* 7 */ 162 NULL, /* 8 = MOUNT_DEVFS */ 163 MOUNT_AFS, /* 9 */ 164 }; 165 166 const u_int nmountcompatnames = __arraycount(mountcompatnames); 167 168 /* 169 * Filter event method for EVFILT_FS. 170 */ 171 static struct klist fs_klist; 172 static kmutex_t fs_klist_lock; 173 174 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0); 175 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0); 176 177 void 178 vfs_evfilt_fs_init(void) 179 { 180 klist_init(&fs_klist); 181 mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE); 182 } 183 184 static int 185 filt_fsattach(struct knote *kn) 186 { 187 mutex_enter(&fs_klist_lock); 188 kn->kn_flags |= EV_CLEAR; 189 klist_insert(&fs_klist, kn); 190 mutex_exit(&fs_klist_lock); 191 192 return 0; 193 } 194 195 static void 196 filt_fsdetach(struct knote *kn) 197 { 198 mutex_enter(&fs_klist_lock); 199 klist_remove(&fs_klist, kn); 200 mutex_exit(&fs_klist_lock); 201 } 202 203 static int 204 filt_fs(struct knote *kn, long hint) 205 { 206 int rv; 207 208 if (hint & NOTE_SUBMIT) { 209 KASSERT(mutex_owned(&fs_klist_lock)); 210 kn->kn_fflags |= hint & ~NOTE_SUBMIT; 211 } else { 212 mutex_enter(&fs_klist_lock); 213 } 214 215 rv = (kn->kn_fflags != 0); 216 217 if ((hint & NOTE_SUBMIT) == 0) { 218 mutex_exit(&fs_klist_lock); 219 } 220 221 return rv; 222 } 223 224 /* referenced in kern_event.c */ 225 const struct filterops fs_filtops = { 226 .f_flags = FILTEROP_MPSAFE, 227 .f_attach = filt_fsattach, 228 .f_detach = filt_fsdetach, 229 .f_event = filt_fs, 230 }; 231 232 static int 233 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 234 { 235 file_t *dfp; 236 int error; 237 238 if (fdat != AT_FDCWD) { 239 if ((error = fd_getvnode(fdat, &dfp)) != 0) 240 goto out; 241 242 NDAT(ndp, dfp->f_vnode); 243 } 244 245 error = namei(ndp); 246 247 if (fdat != AT_FDCWD) 248 fd_putfile(fdat); 249 out: 250 return error; 251 } 252 253 static int 254 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 255 namei_simple_flags_t sflags, struct vnode **vp_ret) 256 { 257 file_t *dfp; 258 struct vnode *dvp; 259 int error; 260 261 if (fdat != AT_FDCWD) { 262 if ((error = fd_getvnode(fdat, &dfp)) != 0) 263 goto out; 264 265 dvp = dfp->f_vnode; 266 } else { 267 dvp = NULL; 268 } 269 270 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 271 272 if (fdat != AT_FDCWD) 273 fd_putfile(fdat); 274 out: 275 return error; 276 } 277 278 static int 279 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 280 { 281 int error; 282 283 fp->f_flag = flags & FMASK; 284 fp->f_type = DTYPE_VNODE; 285 fp->f_ops = &vnops; 286 fp->f_vnode = vp; 287 288 if (flags & (O_EXLOCK | O_SHLOCK)) { 289 struct flock lf; 290 int type; 291 292 lf.l_whence = SEEK_SET; 293 lf.l_start = 0; 294 lf.l_len = 0; 295 if (flags & O_EXLOCK) 296 lf.l_type = F_WRLCK; 297 else 298 lf.l_type = F_RDLCK; 299 type = F_FLOCK; 300 if ((flags & FNONBLOCK) == 0) 301 type |= F_WAIT; 302 VOP_UNLOCK(vp); 303 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 304 if (error) { 305 (void) vn_close(vp, fp->f_flag, fp->f_cred); 306 fd_abort(l->l_proc, fp, indx); 307 return error; 308 } 309 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 310 atomic_or_uint(&fp->f_flag, FHASLOCK); 311 } 312 if (flags & O_CLOEXEC) 313 fd_set_exclose(l, indx, true); 314 return 0; 315 } 316 317 static int 318 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 319 void *data, size_t *data_len) 320 { 321 struct mount *mp; 322 int error = 0, saved_flags; 323 324 mp = vp->v_mount; 325 saved_flags = mp->mnt_flag; 326 327 /* We can operate only on VV_ROOT nodes. */ 328 if ((vp->v_vflag & VV_ROOT) == 0) { 329 error = EINVAL; 330 goto out; 331 } 332 333 /* 334 * We only allow the filesystem to be reloaded if it 335 * is currently mounted read-only. Additionally, we 336 * prevent read-write to read-only downgrades. 337 */ 338 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 339 (mp->mnt_flag & MNT_RDONLY) == 0 && 340 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 341 error = EOPNOTSUPP; /* Needs translation */ 342 goto out; 343 } 344 345 /* 346 * Enabling MNT_UNION requires a covered mountpoint and 347 * must not happen on the root mount. 348 */ 349 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 350 error = EOPNOTSUPP; 351 goto out; 352 } 353 354 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 355 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 356 if (error) 357 goto out; 358 359 error = vfs_suspend(mp, 0); 360 if (error) 361 goto out; 362 363 mutex_enter(mp->mnt_updating); 364 365 mp->mnt_flag &= ~MNT_OP_FLAGS; 366 mp->mnt_flag |= flags & MNT_OP_FLAGS; 367 368 /* 369 * Set the mount level flags. 370 */ 371 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 372 if ((flags & MNT_RDONLY)) 373 mp->mnt_iflag |= IMNT_WANTRDONLY; 374 else 375 mp->mnt_iflag |= IMNT_WANTRDWR; 376 } 377 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 378 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 379 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 380 mp->mnt_flag &= ~MNT_RDONLY; 381 382 error = VFS_MOUNT(mp, path, data, data_len); 383 384 if (error && data != NULL) { 385 int error2; 386 387 /* 388 * Update failed; let's try and see if it was an 389 * export request. For compat with 3.0 and earlier. 390 */ 391 error2 = vfs_hooks_reexport(mp, path, data); 392 393 /* 394 * Only update error code if the export request was 395 * understood but some problem occurred while 396 * processing it. 397 */ 398 if (error2 != EJUSTRETURN) 399 error = error2; 400 } 401 402 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 403 mp->mnt_flag |= MNT_RDONLY; 404 if (error) 405 mp->mnt_flag = saved_flags; 406 mp->mnt_flag &= ~MNT_OP_FLAGS; 407 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 408 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 409 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 410 vfs_syncer_add_to_worklist(mp); 411 } else { 412 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 413 vfs_syncer_remove_from_worklist(mp); 414 } 415 mutex_exit(mp->mnt_updating); 416 vfs_resume(mp); 417 418 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 419 (flags & MNT_EXTATTR)) { 420 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 421 NULL, 0, NULL) != 0) { 422 printf("%s: failed to start extattr, error = %d", 423 mp->mnt_stat.f_mntonname, error); 424 mp->mnt_flag &= ~MNT_EXTATTR; 425 } 426 } 427 428 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 429 !(flags & MNT_EXTATTR)) { 430 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 431 NULL, 0, NULL) != 0) { 432 printf("%s: failed to stop extattr, error = %d", 433 mp->mnt_stat.f_mntonname, error); 434 mp->mnt_flag |= MNT_RDONLY; 435 } 436 } 437 out: 438 return (error); 439 } 440 441 static int 442 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 443 struct vfsops **vfsops) 444 { 445 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 446 int error; 447 448 if (type_seg == UIO_USERSPACE) { 449 /* Copy file-system type from userspace. */ 450 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 451 } else { 452 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 453 KASSERT(error == 0); 454 } 455 456 if (error) { 457 /* 458 * Historically, filesystem types were identified by numbers. 459 * If we get an integer for the filesystem type instead of a 460 * string, we check to see if it matches one of the historic 461 * filesystem types. 462 */ 463 u_long fsindex = (u_long)fstype; 464 if (fsindex >= nmountcompatnames || 465 mountcompatnames[fsindex] == NULL) 466 return ENODEV; 467 strlcpy(fstypename, mountcompatnames[fsindex], 468 sizeof(fstypename)); 469 } 470 471 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 472 if (strcmp(fstypename, "ufs") == 0) 473 fstypename[0] = 'f'; 474 475 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 476 return 0; 477 478 /* If we can autoload a vfs module, try again */ 479 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 480 481 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 482 return 0; 483 484 return ENODEV; 485 } 486 487 static int 488 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 489 void *data, size_t *data_len) 490 { 491 struct mount *mp; 492 int error; 493 494 /* If MNT_GETARGS is specified, it should be the only flag. */ 495 if (flags & ~MNT_GETARGS) 496 return EINVAL; 497 498 mp = vp->v_mount; 499 500 /* XXX: probably some notion of "can see" here if we want isolation. */ 501 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 502 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 503 if (error) 504 return error; 505 506 if ((vp->v_vflag & VV_ROOT) == 0) 507 return EINVAL; 508 509 if (vfs_busy(mp)) 510 return EPERM; 511 512 mutex_enter(mp->mnt_updating); 513 mp->mnt_flag &= ~MNT_OP_FLAGS; 514 mp->mnt_flag |= MNT_GETARGS; 515 error = VFS_MOUNT(mp, path, data, data_len); 516 mp->mnt_flag &= ~MNT_OP_FLAGS; 517 mutex_exit(mp->mnt_updating); 518 519 vfs_unbusy(mp); 520 return (error); 521 } 522 523 int 524 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 525 { 526 /* { 527 syscallarg(const char *) type; 528 syscallarg(const char *) path; 529 syscallarg(int) flags; 530 syscallarg(void *) data; 531 syscallarg(size_t) data_len; 532 } */ 533 534 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, SCARG(uap, path), 535 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 536 SCARG(uap, data_len), retval); 537 } 538 539 int 540 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 541 const char *path, int flags, void *data, enum uio_seg data_seg, 542 size_t data_len, register_t *retval) 543 { 544 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 545 struct vnode *vp; 546 void *data_buf = data; 547 bool vfsopsrele = false; 548 size_t alloc_sz = 0; 549 int error; 550 551 /* 552 * Get vnode to be covered 553 */ 554 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 555 if (error != 0) { 556 vp = NULL; 557 goto done; 558 } 559 560 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 561 vfsops = vp->v_mount->mnt_op; 562 } else { 563 /* 'type' is userspace */ 564 error = mount_get_vfsops(type, type_seg, &vfsops); 565 if (error != 0) 566 goto done; 567 vfsopsrele = true; 568 } 569 570 /* 571 * We allow data to be NULL, even for userspace. Some fs's don't need 572 * it. The others will handle NULL. 573 */ 574 if (data != NULL && data_seg == UIO_USERSPACE) { 575 if (data_len == 0) { 576 /* No length supplied, use default for filesystem */ 577 data_len = vfsops->vfs_min_mount_data; 578 579 /* 580 * Hopefully a longer buffer won't make copyin() fail. 581 * For compatibility with 3.0 and earlier. 582 */ 583 if (flags & MNT_UPDATE 584 && data_len < sizeof (struct mnt_export_args30)) 585 data_len = sizeof (struct mnt_export_args30); 586 } 587 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 588 error = EINVAL; 589 goto done; 590 } 591 alloc_sz = data_len; 592 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 593 594 /* NFS needs the buffer even for mnt_getargs .... */ 595 error = copyin(data, data_buf, data_len); 596 if (error != 0) 597 goto done; 598 } 599 600 if (flags & MNT_GETARGS) { 601 if (data_len == 0) { 602 error = EINVAL; 603 goto done; 604 } 605 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 606 if (error != 0) 607 goto done; 608 if (data_seg == UIO_USERSPACE) 609 error = copyout(data_buf, data, data_len); 610 *retval = data_len; 611 } else if (flags & MNT_UPDATE) { 612 error = mount_update(l, vp, path, flags, data_buf, &data_len); 613 } else { 614 /* Locking is handled internally in mount_domount(). */ 615 KASSERT(vfsopsrele == true); 616 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 617 &data_len); 618 vfsopsrele = false; 619 } 620 if (!error) { 621 mutex_enter(&fs_klist_lock); 622 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT); 623 mutex_exit(&fs_klist_lock); 624 } 625 626 done: 627 if (vfsopsrele) 628 vfs_delref(vfsops); 629 if (vp != NULL) { 630 vrele(vp); 631 } 632 if (data_buf != data) 633 kmem_free(data_buf, alloc_sz); 634 return (error); 635 } 636 637 /* 638 * Unmount a file system. 639 * 640 * Note: unmount takes a path to the vnode mounted on as argument, 641 * not special file (as before). 642 */ 643 /* ARGSUSED */ 644 int 645 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 646 { 647 /* { 648 syscallarg(const char *) path; 649 syscallarg(int) flags; 650 } */ 651 struct vnode *vp; 652 struct mount *mp; 653 int error; 654 struct pathbuf *pb; 655 struct nameidata nd; 656 657 error = pathbuf_copyin(SCARG(uap, path), &pb); 658 if (error) { 659 return error; 660 } 661 662 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 663 if ((error = namei(&nd)) != 0) { 664 pathbuf_destroy(pb); 665 return error; 666 } 667 vp = nd.ni_vp; 668 pathbuf_destroy(pb); 669 670 mp = vp->v_mount; 671 vfs_ref(mp); 672 VOP_UNLOCK(vp); 673 674 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 675 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 676 if (error) { 677 vrele(vp); 678 vfs_rele(mp); 679 return (error); 680 } 681 682 /* 683 * Don't allow unmounting the root file system. 684 */ 685 if (mp->mnt_flag & MNT_ROOTFS) { 686 vrele(vp); 687 vfs_rele(mp); 688 return (EINVAL); 689 } 690 691 /* 692 * Must be the root of the filesystem 693 */ 694 if ((vp->v_vflag & VV_ROOT) == 0) { 695 vrele(vp); 696 vfs_rele(mp); 697 return (EINVAL); 698 } 699 700 vrele(vp); 701 error = dounmount(mp, SCARG(uap, flags), l); 702 vfs_rele(mp); 703 if (!error) { 704 mutex_enter(&fs_klist_lock); 705 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT); 706 mutex_exit(&fs_klist_lock); 707 } 708 return error; 709 } 710 711 /* 712 * Sync each mounted filesystem. 713 */ 714 #ifdef DEBUG 715 int syncprt = 0; 716 struct ctldebug debug0 = { "syncprt", &syncprt }; 717 #endif 718 719 void 720 do_sys_sync(struct lwp *l) 721 { 722 mount_iterator_t *iter; 723 struct mount *mp; 724 int asyncflag; 725 726 mountlist_iterator_init(&iter); 727 while ((mp = mountlist_iterator_next(iter)) != NULL) { 728 mutex_enter(mp->mnt_updating); 729 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 730 asyncflag = mp->mnt_flag & MNT_ASYNC; 731 mp->mnt_flag &= ~MNT_ASYNC; 732 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 733 if (asyncflag) 734 mp->mnt_flag |= MNT_ASYNC; 735 } 736 mutex_exit(mp->mnt_updating); 737 } 738 mountlist_iterator_destroy(iter); 739 #ifdef DEBUG 740 if (syncprt) 741 vfs_bufstats(); 742 #endif /* DEBUG */ 743 } 744 745 static bool 746 sync_vnode_filter(void *cookie, vnode_t *vp) 747 { 748 749 if (vp->v_numoutput > 0) { 750 ++*(int *)cookie; 751 } 752 return false; 753 } 754 755 int 756 vfs_syncwait(void) 757 { 758 int nbusy, nbusy_prev, iter; 759 struct vnode_iterator *vniter; 760 mount_iterator_t *mpiter; 761 struct mount *mp; 762 763 for (nbusy_prev = 0, iter = 0; iter < 20;) { 764 nbusy = 0; 765 mountlist_iterator_init(&mpiter); 766 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 767 vnode_t *vp __diagused; 768 vfs_vnode_iterator_init(mp, &vniter); 769 vp = vfs_vnode_iterator_next(vniter, 770 sync_vnode_filter, &nbusy); 771 KASSERT(vp == NULL); 772 vfs_vnode_iterator_destroy(vniter); 773 } 774 mountlist_iterator_destroy(mpiter); 775 776 if (nbusy == 0) 777 break; 778 if (nbusy_prev == 0) 779 nbusy_prev = nbusy; 780 printf("%d ", nbusy); 781 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 782 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 783 iter++; 784 else 785 nbusy_prev = nbusy; 786 } 787 788 if (nbusy) { 789 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 790 printf("giving up\nPrinting vnodes for busy buffers\n"); 791 mountlist_iterator_init(&mpiter); 792 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 793 vnode_t *vp; 794 vfs_vnode_iterator_init(mp, &vniter); 795 vp = vfs_vnode_iterator_next(vniter, 796 NULL, NULL); 797 mutex_enter(vp->v_interlock); 798 if (vp->v_numoutput > 0) 799 vprint(NULL, vp); 800 mutex_exit(vp->v_interlock); 801 vrele(vp); 802 vfs_vnode_iterator_destroy(vniter); 803 } 804 mountlist_iterator_destroy(mpiter); 805 #endif 806 } 807 808 return nbusy; 809 } 810 811 /* ARGSUSED */ 812 int 813 sys_sync(struct lwp *l, const void *v, register_t *retval) 814 { 815 do_sys_sync(l); 816 return (0); 817 } 818 819 820 /* 821 * Access or change filesystem quotas. 822 * 823 * (this is really 14 different calls bundled into one) 824 */ 825 826 static int 827 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 828 { 829 struct quotastat info_k; 830 int error; 831 832 /* ensure any padding bytes are cleared */ 833 memset(&info_k, 0, sizeof(info_k)); 834 835 error = vfs_quotactl_stat(mp, &info_k); 836 if (error) { 837 return error; 838 } 839 840 return copyout(&info_k, info_u, sizeof(info_k)); 841 } 842 843 static int 844 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 845 struct quotaidtypestat *info_u) 846 { 847 struct quotaidtypestat info_k; 848 int error; 849 850 /* ensure any padding bytes are cleared */ 851 memset(&info_k, 0, sizeof(info_k)); 852 853 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 854 if (error) { 855 return error; 856 } 857 858 return copyout(&info_k, info_u, sizeof(info_k)); 859 } 860 861 static int 862 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 863 struct quotaobjtypestat *info_u) 864 { 865 struct quotaobjtypestat info_k; 866 int error; 867 868 /* ensure any padding bytes are cleared */ 869 memset(&info_k, 0, sizeof(info_k)); 870 871 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 872 if (error) { 873 return error; 874 } 875 876 return copyout(&info_k, info_u, sizeof(info_k)); 877 } 878 879 static int 880 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 881 struct quotaval *val_u) 882 { 883 struct quotakey key_k; 884 struct quotaval val_k; 885 int error; 886 887 /* ensure any padding bytes are cleared */ 888 memset(&val_k, 0, sizeof(val_k)); 889 890 error = copyin(key_u, &key_k, sizeof(key_k)); 891 if (error) { 892 return error; 893 } 894 895 error = vfs_quotactl_get(mp, &key_k, &val_k); 896 if (error) { 897 return error; 898 } 899 900 return copyout(&val_k, val_u, sizeof(val_k)); 901 } 902 903 static int 904 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 905 const struct quotaval *val_u) 906 { 907 struct quotakey key_k; 908 struct quotaval val_k; 909 int error; 910 911 error = copyin(key_u, &key_k, sizeof(key_k)); 912 if (error) { 913 return error; 914 } 915 916 error = copyin(val_u, &val_k, sizeof(val_k)); 917 if (error) { 918 return error; 919 } 920 921 return vfs_quotactl_put(mp, &key_k, &val_k); 922 } 923 924 static int 925 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 926 { 927 struct quotakey key_k; 928 int error; 929 930 error = copyin(key_u, &key_k, sizeof(key_k)); 931 if (error) { 932 return error; 933 } 934 935 return vfs_quotactl_del(mp, &key_k); 936 } 937 938 static int 939 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 940 { 941 struct quotakcursor cursor_k; 942 int error; 943 944 /* ensure any padding bytes are cleared */ 945 memset(&cursor_k, 0, sizeof(cursor_k)); 946 947 error = vfs_quotactl_cursoropen(mp, &cursor_k); 948 if (error) { 949 return error; 950 } 951 952 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 953 } 954 955 static int 956 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 957 { 958 struct quotakcursor cursor_k; 959 int error; 960 961 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 962 if (error) { 963 return error; 964 } 965 966 return vfs_quotactl_cursorclose(mp, &cursor_k); 967 } 968 969 static int 970 do_sys_quotactl_cursorskipidtype(struct mount *mp, 971 struct quotakcursor *cursor_u, int idtype) 972 { 973 struct quotakcursor cursor_k; 974 int error; 975 976 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 977 if (error) { 978 return error; 979 } 980 981 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 982 if (error) { 983 return error; 984 } 985 986 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 987 } 988 989 static int 990 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 991 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 992 unsigned *ret_u) 993 { 994 #define CGET_STACK_MAX 8 995 struct quotakcursor cursor_k; 996 struct quotakey stackkeys[CGET_STACK_MAX]; 997 struct quotaval stackvals[CGET_STACK_MAX]; 998 struct quotakey *keys_k; 999 struct quotaval *vals_k; 1000 unsigned ret_k; 1001 int error; 1002 1003 if (maxnum > 128) { 1004 maxnum = 128; 1005 } 1006 1007 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1008 if (error) { 1009 return error; 1010 } 1011 1012 if (maxnum <= CGET_STACK_MAX) { 1013 keys_k = stackkeys; 1014 vals_k = stackvals; 1015 /* ensure any padding bytes are cleared */ 1016 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 1017 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 1018 } else { 1019 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 1020 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 1021 } 1022 1023 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 1024 &ret_k); 1025 if (error) { 1026 goto fail; 1027 } 1028 1029 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 1030 if (error) { 1031 goto fail; 1032 } 1033 1034 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 1035 if (error) { 1036 goto fail; 1037 } 1038 1039 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1040 if (error) { 1041 goto fail; 1042 } 1043 1044 /* do last to maximize the chance of being able to recover a failure */ 1045 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1046 1047 fail: 1048 if (keys_k != stackkeys) { 1049 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 1050 } 1051 if (vals_k != stackvals) { 1052 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 1053 } 1054 return error; 1055 } 1056 1057 static int 1058 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 1059 int *ret_u) 1060 { 1061 struct quotakcursor cursor_k; 1062 int ret_k; 1063 int error; 1064 1065 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1066 if (error) { 1067 return error; 1068 } 1069 1070 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1071 if (error) { 1072 return error; 1073 } 1074 1075 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1076 if (error) { 1077 return error; 1078 } 1079 1080 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1081 } 1082 1083 static int 1084 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1085 { 1086 struct quotakcursor cursor_k; 1087 int error; 1088 1089 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1090 if (error) { 1091 return error; 1092 } 1093 1094 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1095 if (error) { 1096 return error; 1097 } 1098 1099 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1100 } 1101 1102 static int 1103 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1104 { 1105 char *path_k; 1106 int error; 1107 1108 /* XXX this should probably be a struct pathbuf */ 1109 path_k = PNBUF_GET(); 1110 error = copyin(path_u, path_k, PATH_MAX); 1111 if (error) { 1112 PNBUF_PUT(path_k); 1113 return error; 1114 } 1115 1116 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1117 1118 PNBUF_PUT(path_k); 1119 return error; 1120 } 1121 1122 static int 1123 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1124 { 1125 return vfs_quotactl_quotaoff(mp, idtype); 1126 } 1127 1128 int 1129 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1130 { 1131 struct mount *mp; 1132 struct vnode *vp; 1133 int error; 1134 1135 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1136 if (error != 0) 1137 return (error); 1138 mp = vp->v_mount; 1139 1140 switch (args->qc_op) { 1141 case QUOTACTL_STAT: 1142 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1143 break; 1144 case QUOTACTL_IDTYPESTAT: 1145 error = do_sys_quotactl_idtypestat(mp, 1146 args->u.idtypestat.qc_idtype, 1147 args->u.idtypestat.qc_info); 1148 break; 1149 case QUOTACTL_OBJTYPESTAT: 1150 error = do_sys_quotactl_objtypestat(mp, 1151 args->u.objtypestat.qc_objtype, 1152 args->u.objtypestat.qc_info); 1153 break; 1154 case QUOTACTL_GET: 1155 error = do_sys_quotactl_get(mp, 1156 args->u.get.qc_key, 1157 args->u.get.qc_val); 1158 break; 1159 case QUOTACTL_PUT: 1160 error = do_sys_quotactl_put(mp, 1161 args->u.put.qc_key, 1162 args->u.put.qc_val); 1163 break; 1164 case QUOTACTL_DEL: 1165 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1166 break; 1167 case QUOTACTL_CURSOROPEN: 1168 error = do_sys_quotactl_cursoropen(mp, 1169 args->u.cursoropen.qc_cursor); 1170 break; 1171 case QUOTACTL_CURSORCLOSE: 1172 error = do_sys_quotactl_cursorclose(mp, 1173 args->u.cursorclose.qc_cursor); 1174 break; 1175 case QUOTACTL_CURSORSKIPIDTYPE: 1176 error = do_sys_quotactl_cursorskipidtype(mp, 1177 args->u.cursorskipidtype.qc_cursor, 1178 args->u.cursorskipidtype.qc_idtype); 1179 break; 1180 case QUOTACTL_CURSORGET: 1181 error = do_sys_quotactl_cursorget(mp, 1182 args->u.cursorget.qc_cursor, 1183 args->u.cursorget.qc_keys, 1184 args->u.cursorget.qc_vals, 1185 args->u.cursorget.qc_maxnum, 1186 args->u.cursorget.qc_ret); 1187 break; 1188 case QUOTACTL_CURSORATEND: 1189 error = do_sys_quotactl_cursoratend(mp, 1190 args->u.cursoratend.qc_cursor, 1191 args->u.cursoratend.qc_ret); 1192 break; 1193 case QUOTACTL_CURSORREWIND: 1194 error = do_sys_quotactl_cursorrewind(mp, 1195 args->u.cursorrewind.qc_cursor); 1196 break; 1197 case QUOTACTL_QUOTAON: 1198 error = do_sys_quotactl_quotaon(mp, 1199 args->u.quotaon.qc_idtype, 1200 args->u.quotaon.qc_quotafile); 1201 break; 1202 case QUOTACTL_QUOTAOFF: 1203 error = do_sys_quotactl_quotaoff(mp, 1204 args->u.quotaoff.qc_idtype); 1205 break; 1206 default: 1207 error = EINVAL; 1208 break; 1209 } 1210 1211 vrele(vp); 1212 return error; 1213 } 1214 1215 /* ARGSUSED */ 1216 int 1217 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1218 register_t *retval) 1219 { 1220 /* { 1221 syscallarg(const char *) path; 1222 syscallarg(struct quotactl_args *) args; 1223 } */ 1224 struct quotactl_args args; 1225 int error; 1226 1227 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1228 if (error) { 1229 return error; 1230 } 1231 1232 return do_sys_quotactl(SCARG(uap, path), &args); 1233 } 1234 1235 int 1236 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1237 int root) 1238 { 1239 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1240 bool chrooted; 1241 int error = 0; 1242 1243 KASSERT(l == curlwp); 1244 1245 /* 1246 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1247 * since it would imply chroots can be escaped. Just make sure this 1248 * routine is self-consistent. 1249 */ 1250 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1251 1252 /* 1253 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1254 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1255 * overrides MNT_NOWAIT. 1256 */ 1257 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1258 (flags != MNT_WAIT && flags != 0)) { 1259 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1260 } else { 1261 /* Get the filesystem stats now */ 1262 memset(sp, 0, sizeof(*sp)); 1263 if ((error = VFS_STATVFS(mp, sp)) != 0) 1264 return error; 1265 if (!chrooted) 1266 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1267 } 1268 1269 if (chrooted) { 1270 size_t len; 1271 char *bp; 1272 char c; 1273 char *path = PNBUF_GET(); 1274 1275 bp = path + MAXPATHLEN; 1276 *--bp = '\0'; 1277 rw_enter(&cwdi->cwdi_lock, RW_READER); 1278 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1279 MAXPATHLEN / 2, 0, l); 1280 rw_exit(&cwdi->cwdi_lock); 1281 if (error) { 1282 PNBUF_PUT(path); 1283 return error; 1284 } 1285 len = strlen(bp); 1286 if (len != 1) { 1287 /* 1288 * for mount points that are below our root, we can see 1289 * them, so we fix up the pathname and return them. The 1290 * rest we cannot see, so we don't allow viewing the 1291 * data. 1292 */ 1293 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1294 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1295 (void)strlcpy(sp->f_mntonname, 1296 c == '\0' ? "/" : &sp->f_mntonname[len], 1297 sizeof(sp->f_mntonname)); 1298 } else { 1299 if (root) 1300 (void)strlcpy(sp->f_mntonname, "/", 1301 sizeof(sp->f_mntonname)); 1302 else 1303 error = EPERM; 1304 } 1305 } 1306 PNBUF_PUT(path); 1307 } 1308 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1309 return error; 1310 } 1311 1312 /* 1313 * Get filesystem statistics by path. 1314 */ 1315 int 1316 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1317 { 1318 struct mount *mp; 1319 int error; 1320 struct vnode *vp; 1321 1322 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1323 if (error != 0) 1324 return error; 1325 mp = vp->v_mount; 1326 error = dostatvfs(mp, sb, l, flags, 1); 1327 vrele(vp); 1328 return error; 1329 } 1330 1331 /* ARGSUSED */ 1332 int 1333 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, register_t *retval) 1334 { 1335 /* { 1336 syscallarg(const char *) path; 1337 syscallarg(struct statvfs *) buf; 1338 syscallarg(int) flags; 1339 } */ 1340 struct statvfs *sb; 1341 int error; 1342 1343 sb = STATVFSBUF_GET(); 1344 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1345 if (error == 0) 1346 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1347 STATVFSBUF_PUT(sb); 1348 return error; 1349 } 1350 1351 /* 1352 * Get filesystem statistics by fd. 1353 */ 1354 int 1355 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1356 { 1357 file_t *fp; 1358 struct mount *mp; 1359 int error; 1360 1361 /* fd_getvnode() will use the descriptor for us */ 1362 if ((error = fd_getvnode(fd, &fp)) != 0) 1363 return (error); 1364 mp = fp->f_vnode->v_mount; 1365 error = dostatvfs(mp, sb, curlwp, flags, 1); 1366 fd_putfile(fd); 1367 return error; 1368 } 1369 1370 /* ARGSUSED */ 1371 int 1372 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, register_t *retval) 1373 { 1374 /* { 1375 syscallarg(int) fd; 1376 syscallarg(struct statvfs *) buf; 1377 syscallarg(int) flags; 1378 } */ 1379 struct statvfs *sb; 1380 int error; 1381 1382 sb = STATVFSBUF_GET(); 1383 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1384 if (error == 0) 1385 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1386 STATVFSBUF_PUT(sb); 1387 return error; 1388 } 1389 1390 1391 /* 1392 * Get statistics on all filesystems. 1393 */ 1394 int 1395 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1396 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1397 register_t *retval) 1398 { 1399 int root = 0; 1400 mount_iterator_t *iter; 1401 struct proc *p = l->l_proc; 1402 struct mount *mp; 1403 struct statvfs *sb; 1404 size_t count, maxcount; 1405 int error = 0; 1406 1407 sb = STATVFSBUF_GET(); 1408 maxcount = bufsize / entry_sz; 1409 count = 0; 1410 mountlist_iterator_init(&iter); 1411 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1412 if (sfsp && count < maxcount) { 1413 error = dostatvfs(mp, sb, l, flags, 0); 1414 if (error) { 1415 error = 0; 1416 continue; 1417 } 1418 error = copyfn(sb, sfsp, entry_sz); 1419 if (error) 1420 goto out; 1421 sfsp = (char *)sfsp + entry_sz; 1422 root |= strcmp(sb->f_mntonname, "/") == 0; 1423 } 1424 count++; 1425 } 1426 1427 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1428 /* 1429 * fake a root entry 1430 */ 1431 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1432 sb, l, flags, 1); 1433 if (error != 0) 1434 goto out; 1435 if (sfsp) { 1436 error = copyfn(sb, sfsp, entry_sz); 1437 if (error != 0) 1438 goto out; 1439 } 1440 count++; 1441 } 1442 if (sfsp && count > maxcount) 1443 *retval = maxcount; 1444 else 1445 *retval = count; 1446 out: 1447 mountlist_iterator_destroy(iter); 1448 STATVFSBUF_PUT(sb); 1449 return error; 1450 } 1451 1452 int 1453 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1454 register_t *retval) 1455 { 1456 /* { 1457 syscallarg(struct statvfs *) buf; 1458 syscallarg(size_t) bufsize; 1459 syscallarg(int) flags; 1460 } */ 1461 1462 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1463 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1464 } 1465 1466 /* 1467 * Change current working directory to a given file descriptor. 1468 */ 1469 int 1470 do_sys_fchdir(struct lwp *l, int fd, register_t *retval) 1471 { 1472 struct proc *p = l->l_proc; 1473 struct cwdinfo *cwdi; 1474 struct vnode *vp, *tdp; 1475 struct mount *mp; 1476 file_t *fp; 1477 int error; 1478 1479 /* fd_getvnode() will use the descriptor for us */ 1480 if ((error = fd_getvnode(fd, &fp)) != 0) 1481 return error; 1482 vp = fp->f_vnode; 1483 1484 vref(vp); 1485 vn_lock(vp, LK_SHARED | LK_RETRY); 1486 if (vp->v_type != VDIR) 1487 error = ENOTDIR; 1488 else 1489 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1490 if (error) { 1491 vput(vp); 1492 goto out; 1493 } 1494 while ((mp = vp->v_mountedhere) != NULL) { 1495 error = vfs_busy(mp); 1496 vput(vp); 1497 if (error != 0) 1498 goto out; 1499 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1500 vfs_unbusy(mp); 1501 if (error) 1502 goto out; 1503 vp = tdp; 1504 } 1505 VOP_UNLOCK(vp); 1506 1507 /* 1508 * Disallow changing to a directory not under the process's 1509 * current root directory (if there is one). 1510 */ 1511 cwdi = p->p_cwdi; 1512 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1513 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1514 vrele(vp); 1515 error = EPERM; /* operation not permitted */ 1516 } else { 1517 vrele(cwdi->cwdi_cdir); 1518 cwdi->cwdi_cdir = vp; 1519 } 1520 rw_exit(&cwdi->cwdi_lock); 1521 1522 out: 1523 fd_putfile(fd); 1524 return error; 1525 } 1526 1527 /* 1528 * Change current working directory to a given file descriptor. 1529 */ 1530 /* ARGSUSED */ 1531 int 1532 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1533 { 1534 /* { 1535 syscallarg(int) fd; 1536 } */ 1537 return do_sys_fchdir(l, SCARG(uap, fd), retval); 1538 } 1539 1540 /* 1541 * Change this process's notion of the root directory to a given file 1542 * descriptor. 1543 */ 1544 int 1545 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1546 { 1547 struct vnode *vp; 1548 file_t *fp; 1549 int error, fd = SCARG(uap, fd); 1550 1551 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1552 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1553 return error; 1554 /* fd_getvnode() will use the descriptor for us */ 1555 if ((error = fd_getvnode(fd, &fp)) != 0) 1556 return error; 1557 vp = fp->f_vnode; 1558 vn_lock(vp, LK_SHARED | LK_RETRY); 1559 if (vp->v_type != VDIR) 1560 error = ENOTDIR; 1561 else 1562 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1563 VOP_UNLOCK(vp); 1564 if (error) 1565 goto out; 1566 vref(vp); 1567 change_root(vp); 1568 1569 out: 1570 fd_putfile(fd); 1571 return (error); 1572 } 1573 1574 /* 1575 * Change current working directory (``.''). 1576 */ 1577 int 1578 do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg, 1579 register_t *retval) 1580 { 1581 struct proc *p = l->l_proc; 1582 struct cwdinfo * cwdi; 1583 int error; 1584 struct vnode *vp; 1585 1586 if ((error = chdir_lookup(path, seg, &vp, l)) != 0) 1587 return error; 1588 cwdi = p->p_cwdi; 1589 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1590 vrele(cwdi->cwdi_cdir); 1591 cwdi->cwdi_cdir = vp; 1592 rw_exit(&cwdi->cwdi_lock); 1593 return 0; 1594 } 1595 1596 /* 1597 * Change current working directory (``.''). 1598 */ 1599 /* ARGSUSED */ 1600 int 1601 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1602 { 1603 /* { 1604 syscallarg(const char *) path; 1605 } */ 1606 return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval); 1607 } 1608 1609 /* 1610 * Change notion of root (``/'') directory. 1611 */ 1612 /* ARGSUSED */ 1613 int 1614 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1615 { 1616 /* { 1617 syscallarg(const char *) path; 1618 } */ 1619 int error; 1620 struct vnode *vp; 1621 1622 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1623 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1624 return (error); 1625 1626 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1627 if (error == 0) 1628 change_root(vp); 1629 return error; 1630 } 1631 1632 /* 1633 * Common routine for chroot and fchroot. 1634 * NB: callers need to properly authorize the change root operation. 1635 */ 1636 void 1637 change_root(struct vnode *vp) 1638 { 1639 kauth_cred_t ncred; 1640 struct lwp *l = curlwp; 1641 struct proc *p = l->l_proc; 1642 struct cwdinfo *cwdi = p->p_cwdi; 1643 1644 ncred = kauth_cred_alloc(); 1645 1646 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1647 if (cwdi->cwdi_rdir != NULL) 1648 vrele(cwdi->cwdi_rdir); 1649 cwdi->cwdi_rdir = vp; 1650 1651 /* 1652 * Prevent escaping from chroot by putting the root under 1653 * the working directory. Silently chdir to / if we aren't 1654 * already there. 1655 */ 1656 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1657 /* 1658 * XXX would be more failsafe to change directory to a 1659 * deadfs node here instead 1660 */ 1661 vrele(cwdi->cwdi_cdir); 1662 vref(vp); 1663 cwdi->cwdi_cdir = vp; 1664 } 1665 rw_exit(&cwdi->cwdi_lock); 1666 1667 /* Get a write lock on the process credential. */ 1668 proc_crmod_enter(); 1669 1670 kauth_cred_clone(p->p_cred, ncred); 1671 kauth_proc_chroot(ncred, p->p_cwdi); 1672 1673 /* Broadcast our credentials to the process and other LWPs. */ 1674 proc_crmod_leave(ncred, p->p_cred, true); 1675 } 1676 1677 /* 1678 * Common routine for chroot and chdir. 1679 * XXX "where" should be enum uio_seg 1680 */ 1681 int 1682 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1683 { 1684 struct pathbuf *pb; 1685 struct nameidata nd; 1686 int error; 1687 1688 error = pathbuf_maybe_copyin(path, where, &pb); 1689 if (error) { 1690 return error; 1691 } 1692 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1693 if ((error = namei(&nd)) != 0) { 1694 pathbuf_destroy(pb); 1695 return error; 1696 } 1697 *vpp = nd.ni_vp; 1698 pathbuf_destroy(pb); 1699 1700 if ((*vpp)->v_type != VDIR) 1701 error = ENOTDIR; 1702 else 1703 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1704 1705 if (error) 1706 vput(*vpp); 1707 else 1708 VOP_UNLOCK(*vpp); 1709 return (error); 1710 } 1711 1712 /* 1713 * Internals of sys_open - path has already been converted into a pathbuf 1714 * (so we can easily reuse this function from other parts of the kernel, 1715 * like posix_spawn post-processing). 1716 */ 1717 int 1718 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1719 int open_mode, int *fd) 1720 { 1721 struct proc *p = l->l_proc; 1722 struct cwdinfo *cwdi = p->p_cwdi; 1723 file_t *fp; 1724 struct vnode *vp; 1725 int dupfd; 1726 bool dupfd_move; 1727 int flags, cmode; 1728 int indx, error; 1729 1730 if (open_flags & O_SEARCH) { 1731 open_flags &= ~(int)O_SEARCH; 1732 } 1733 1734 /* 1735 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1736 * may be specified. 1737 */ 1738 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1739 return EINVAL; 1740 1741 flags = FFLAGS(open_flags); 1742 if ((flags & (FREAD | FWRITE)) == 0) 1743 return EINVAL; 1744 1745 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1746 return error; 1747 } 1748 1749 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1750 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1751 1752 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode, 1753 &vp, &dupfd_move, &dupfd); 1754 if (error != 0) { 1755 fd_abort(p, fp, indx); 1756 return error; 1757 } 1758 1759 if (vp == NULL) { 1760 fd_abort(p, fp, indx); 1761 error = fd_dupopen(dupfd, dupfd_move, flags, &indx); 1762 if (error) 1763 return error; 1764 *fd = indx; 1765 } else { 1766 error = open_setfp(l, fp, vp, indx, flags); 1767 if (error) 1768 return error; 1769 VOP_UNLOCK(vp); 1770 *fd = indx; 1771 fd_affix(p, fp, indx); 1772 } 1773 1774 return 0; 1775 } 1776 1777 int 1778 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1779 { 1780 struct pathbuf *pb; 1781 int error, oflags; 1782 1783 oflags = FFLAGS(open_flags); 1784 if ((oflags & (FREAD | FWRITE)) == 0) 1785 return EINVAL; 1786 1787 pb = pathbuf_create(path); 1788 if (pb == NULL) 1789 return ENOMEM; 1790 1791 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1792 pathbuf_destroy(pb); 1793 1794 return error; 1795 } 1796 1797 static int 1798 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1799 int mode, int *fd) 1800 { 1801 file_t *dfp = NULL; 1802 struct vnode *dvp = NULL; 1803 struct pathbuf *pb; 1804 const char *pathstring = NULL; 1805 int error; 1806 1807 if (path == NULL) { 1808 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1809 if (error == ENOSYS) 1810 goto no_compat; 1811 if (error) 1812 return error; 1813 } else { 1814 no_compat: 1815 error = pathbuf_copyin(path, &pb); 1816 if (error) 1817 return error; 1818 } 1819 1820 pathstring = pathbuf_stringcopy_get(pb); 1821 1822 /* 1823 * fdat is ignored if: 1824 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1825 * 2) if path is absolute, then fdat is useless. 1826 */ 1827 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1828 /* fd_getvnode() will use the descriptor for us */ 1829 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1830 goto out; 1831 1832 dvp = dfp->f_vnode; 1833 } 1834 1835 error = do_open(l, dvp, pb, flags, mode, fd); 1836 1837 if (dfp != NULL) 1838 fd_putfile(fdat); 1839 out: 1840 pathbuf_stringcopy_put(pb, pathstring); 1841 pathbuf_destroy(pb); 1842 return error; 1843 } 1844 1845 int 1846 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1847 { 1848 /* { 1849 syscallarg(const char *) path; 1850 syscallarg(int) flags; 1851 syscallarg(int) mode; 1852 } */ 1853 int error; 1854 int fd; 1855 1856 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1857 SCARG(uap, flags), SCARG(uap, mode), &fd); 1858 1859 if (error == 0) 1860 *retval = fd; 1861 1862 return error; 1863 } 1864 1865 int 1866 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1867 { 1868 /* { 1869 syscallarg(int) fd; 1870 syscallarg(const char *) path; 1871 syscallarg(int) oflags; 1872 syscallarg(int) mode; 1873 } */ 1874 int error; 1875 int fd; 1876 1877 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1878 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1879 1880 if (error == 0) 1881 *retval = fd; 1882 1883 return error; 1884 } 1885 1886 static void 1887 vfs__fhfree(fhandle_t *fhp) 1888 { 1889 size_t fhsize; 1890 1891 fhsize = FHANDLE_SIZE(fhp); 1892 kmem_free(fhp, fhsize); 1893 } 1894 1895 /* 1896 * vfs_composefh: compose a filehandle. 1897 */ 1898 1899 int 1900 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1901 { 1902 struct mount *mp; 1903 struct fid *fidp; 1904 int error; 1905 size_t needfhsize; 1906 size_t fidsize; 1907 1908 mp = vp->v_mount; 1909 fidp = NULL; 1910 if (*fh_size < FHANDLE_SIZE_MIN) { 1911 fidsize = 0; 1912 } else { 1913 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1914 if (fhp != NULL) { 1915 memset(fhp, 0, *fh_size); 1916 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1917 fidp = &fhp->fh_fid; 1918 } 1919 } 1920 error = VFS_VPTOFH(vp, fidp, &fidsize); 1921 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1922 if (error == 0 && *fh_size < needfhsize) { 1923 error = E2BIG; 1924 } 1925 *fh_size = needfhsize; 1926 return error; 1927 } 1928 1929 int 1930 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1931 { 1932 struct mount *mp; 1933 fhandle_t *fhp; 1934 size_t fhsize; 1935 size_t fidsize; 1936 int error; 1937 1938 mp = vp->v_mount; 1939 fidsize = 0; 1940 error = VFS_VPTOFH(vp, NULL, &fidsize); 1941 KASSERT(error != 0); 1942 if (error != E2BIG) { 1943 goto out; 1944 } 1945 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1946 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1947 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1948 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1949 if (error == 0) { 1950 KASSERT(FHANDLE_SIZE(fhp) == fhsize); 1951 KASSERT(FHANDLE_FILEID(fhp)->fid_len == fidsize); 1952 *fhpp = fhp; 1953 } else { 1954 kmem_free(fhp, fhsize); 1955 } 1956 out: 1957 return error; 1958 } 1959 1960 void 1961 vfs_composefh_free(fhandle_t *fhp) 1962 { 1963 1964 vfs__fhfree(fhp); 1965 } 1966 1967 /* 1968 * vfs_fhtovp: lookup a vnode by a filehandle. 1969 */ 1970 1971 int 1972 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1973 { 1974 struct mount *mp; 1975 int error; 1976 1977 *vpp = NULL; 1978 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1979 if (mp == NULL) { 1980 error = ESTALE; 1981 goto out; 1982 } 1983 if (mp->mnt_op->vfs_fhtovp == NULL) { 1984 error = EOPNOTSUPP; 1985 goto out; 1986 } 1987 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 1988 out: 1989 return error; 1990 } 1991 1992 /* 1993 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1994 * the needed size. 1995 */ 1996 1997 int 1998 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1999 { 2000 fhandle_t *fhp; 2001 int error; 2002 2003 if (fhsize > FHANDLE_SIZE_MAX) { 2004 return EINVAL; 2005 } 2006 if (fhsize < FHANDLE_SIZE_MIN) { 2007 return EINVAL; 2008 } 2009 again: 2010 fhp = kmem_alloc(fhsize, KM_SLEEP); 2011 error = copyin(ufhp, fhp, fhsize); 2012 if (error == 0) { 2013 /* XXX this check shouldn't be here */ 2014 if (FHANDLE_SIZE(fhp) == fhsize) { 2015 *fhpp = fhp; 2016 return 0; 2017 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 2018 /* 2019 * a kludge for nfsv2 padded handles. 2020 */ 2021 size_t sz; 2022 2023 sz = FHANDLE_SIZE(fhp); 2024 kmem_free(fhp, fhsize); 2025 fhsize = sz; 2026 goto again; 2027 } else { 2028 /* 2029 * userland told us wrong size. 2030 */ 2031 error = EINVAL; 2032 } 2033 } 2034 kmem_free(fhp, fhsize); 2035 return error; 2036 } 2037 2038 void 2039 vfs_copyinfh_free(fhandle_t *fhp) 2040 { 2041 2042 vfs__fhfree(fhp); 2043 } 2044 2045 /* 2046 * Get file handle system call 2047 */ 2048 int 2049 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 2050 { 2051 /* { 2052 syscallarg(char *) fname; 2053 syscallarg(fhandle_t *) fhp; 2054 syscallarg(size_t *) fh_size; 2055 } */ 2056 struct vnode *vp; 2057 fhandle_t *fh; 2058 int error; 2059 struct pathbuf *pb; 2060 struct nameidata nd; 2061 size_t sz; 2062 size_t usz; 2063 2064 /* 2065 * Must be super user 2066 */ 2067 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2068 0, NULL, NULL, NULL); 2069 if (error) 2070 return (error); 2071 2072 error = pathbuf_copyin(SCARG(uap, fname), &pb); 2073 if (error) { 2074 return error; 2075 } 2076 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2077 error = namei(&nd); 2078 if (error) { 2079 pathbuf_destroy(pb); 2080 return error; 2081 } 2082 vp = nd.ni_vp; 2083 pathbuf_destroy(pb); 2084 2085 error = vfs_composefh_alloc(vp, &fh); 2086 vput(vp); 2087 if (error != 0) { 2088 return error; 2089 } 2090 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2091 if (error != 0) { 2092 goto out; 2093 } 2094 sz = FHANDLE_SIZE(fh); 2095 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2096 if (error != 0) { 2097 goto out; 2098 } 2099 if (usz >= sz) { 2100 error = copyout(fh, SCARG(uap, fhp), sz); 2101 } else { 2102 error = E2BIG; 2103 } 2104 out: 2105 vfs_composefh_free(fh); 2106 return (error); 2107 } 2108 2109 /* 2110 * Open a file given a file handle. 2111 * 2112 * Check permissions, allocate an open file structure, 2113 * and call the device open routine if any. 2114 */ 2115 2116 int 2117 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2118 register_t *retval) 2119 { 2120 file_t *fp; 2121 struct vnode *vp = NULL; 2122 kauth_cred_t cred = l->l_cred; 2123 file_t *nfp; 2124 int indx, error; 2125 struct vattr va; 2126 fhandle_t *fh; 2127 int flags; 2128 proc_t *p; 2129 2130 p = curproc; 2131 2132 /* 2133 * Must be super user 2134 */ 2135 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2136 0, NULL, NULL, NULL))) 2137 return (error); 2138 2139 if (oflags & O_SEARCH) { 2140 oflags &= ~(int)O_SEARCH; 2141 } 2142 2143 flags = FFLAGS(oflags); 2144 if ((flags & (FREAD | FWRITE)) == 0) 2145 return (EINVAL); 2146 if ((flags & O_CREAT)) 2147 return (EINVAL); 2148 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2149 return (error); 2150 fp = nfp; 2151 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2152 if (error != 0) { 2153 goto bad; 2154 } 2155 error = vfs_fhtovp(fh, &vp); 2156 vfs_copyinfh_free(fh); 2157 if (error != 0) { 2158 goto bad; 2159 } 2160 2161 /* Now do an effective vn_open */ 2162 2163 if (vp->v_type == VSOCK) { 2164 error = EOPNOTSUPP; 2165 goto bad; 2166 } 2167 error = vn_openchk(vp, cred, flags); 2168 if (error != 0) 2169 goto bad; 2170 if (flags & O_TRUNC) { 2171 VOP_UNLOCK(vp); /* XXX */ 2172 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2173 vattr_null(&va); 2174 va.va_size = 0; 2175 error = VOP_SETATTR(vp, &va, cred); 2176 if (error) 2177 goto bad; 2178 } 2179 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2180 goto bad; 2181 if (flags & FWRITE) { 2182 mutex_enter(vp->v_interlock); 2183 vp->v_writecount++; 2184 mutex_exit(vp->v_interlock); 2185 } 2186 2187 /* done with modified vn_open, now finish what sys_open does. */ 2188 if ((error = open_setfp(l, fp, vp, indx, flags))) 2189 return error; 2190 2191 VOP_UNLOCK(vp); 2192 *retval = indx; 2193 fd_affix(p, fp, indx); 2194 return (0); 2195 2196 bad: 2197 fd_abort(p, fp, indx); 2198 if (vp != NULL) 2199 vput(vp); 2200 if (error == EDUPFD || error == EMOVEFD) { 2201 /* XXX should probably close curlwp->l_dupfd */ 2202 error = EOPNOTSUPP; 2203 } 2204 return (error); 2205 } 2206 2207 int 2208 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2209 { 2210 /* { 2211 syscallarg(const void *) fhp; 2212 syscallarg(size_t) fh_size; 2213 syscallarg(int) flags; 2214 } */ 2215 2216 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2217 SCARG(uap, flags), retval); 2218 } 2219 2220 int 2221 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2222 { 2223 int error; 2224 fhandle_t *fh; 2225 struct vnode *vp; 2226 2227 /* 2228 * Must be super user 2229 */ 2230 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2231 0, NULL, NULL, NULL))) 2232 return (error); 2233 2234 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2235 if (error != 0) 2236 return error; 2237 2238 error = vfs_fhtovp(fh, &vp); 2239 vfs_copyinfh_free(fh); 2240 if (error != 0) 2241 return error; 2242 2243 error = vn_stat(vp, sb); 2244 vput(vp); 2245 return error; 2246 } 2247 2248 2249 /* ARGSUSED */ 2250 int 2251 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2252 { 2253 /* { 2254 syscallarg(const void *) fhp; 2255 syscallarg(size_t) fh_size; 2256 syscallarg(struct stat *) sb; 2257 } */ 2258 struct stat sb; 2259 int error; 2260 2261 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2262 if (error) 2263 return error; 2264 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2265 } 2266 2267 int 2268 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2269 int flags) 2270 { 2271 fhandle_t *fh; 2272 struct mount *mp; 2273 struct vnode *vp; 2274 int error; 2275 2276 /* 2277 * Must be super user 2278 */ 2279 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2280 0, NULL, NULL, NULL))) 2281 return error; 2282 2283 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2284 if (error != 0) 2285 return error; 2286 2287 error = vfs_fhtovp(fh, &vp); 2288 vfs_copyinfh_free(fh); 2289 if (error != 0) 2290 return error; 2291 2292 mp = vp->v_mount; 2293 error = dostatvfs(mp, sb, l, flags, 1); 2294 vput(vp); 2295 return error; 2296 } 2297 2298 /* ARGSUSED */ 2299 int 2300 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, register_t *retval) 2301 { 2302 /* { 2303 syscallarg(const void *) fhp; 2304 syscallarg(size_t) fh_size; 2305 syscallarg(struct statvfs *) buf; 2306 syscallarg(int) flags; 2307 } */ 2308 struct statvfs *sb = STATVFSBUF_GET(); 2309 int error; 2310 2311 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2312 SCARG(uap, flags)); 2313 if (error == 0) 2314 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2315 STATVFSBUF_PUT(sb); 2316 return error; 2317 } 2318 2319 int 2320 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2321 dev_t dev) 2322 { 2323 2324 /* 2325 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2326 * in mode and dev=0. 2327 * 2328 * In all the other cases it's implementation defined behavior. 2329 */ 2330 2331 if ((mode & S_IFIFO) && dev == 0) 2332 return do_sys_mkfifoat(l, fdat, pathname, mode); 2333 else 2334 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2335 UIO_USERSPACE); 2336 } 2337 2338 /* 2339 * Create a special file. 2340 */ 2341 /* ARGSUSED */ 2342 int 2343 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2344 register_t *retval) 2345 { 2346 /* { 2347 syscallarg(const char *) path; 2348 syscallarg(mode_t) mode; 2349 syscallarg(dev_t) dev; 2350 } */ 2351 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2352 SCARG(uap, mode), SCARG(uap, dev)); 2353 } 2354 2355 int 2356 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2357 register_t *retval) 2358 { 2359 /* { 2360 syscallarg(int) fd; 2361 syscallarg(const char *) path; 2362 syscallarg(mode_t) mode; 2363 syscallarg(int) pad; 2364 syscallarg(dev_t) dev; 2365 } */ 2366 2367 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2368 SCARG(uap, mode), SCARG(uap, dev)); 2369 } 2370 2371 int 2372 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2373 enum uio_seg seg) 2374 { 2375 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2376 } 2377 2378 int 2379 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2380 dev_t dev, enum uio_seg seg) 2381 { 2382 struct proc *p = l->l_proc; 2383 struct vnode *vp; 2384 struct vattr vattr; 2385 int error, optype; 2386 struct pathbuf *pb; 2387 struct nameidata nd; 2388 const char *pathstring; 2389 2390 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2391 0, NULL, NULL, NULL)) != 0) 2392 return (error); 2393 2394 optype = VOP_MKNOD_DESCOFFSET; 2395 2396 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2397 if (error) { 2398 return error; 2399 } 2400 pathstring = pathbuf_stringcopy_get(pb); 2401 if (pathstring == NULL) { 2402 pathbuf_destroy(pb); 2403 return ENOMEM; 2404 } 2405 2406 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2407 2408 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2409 goto out; 2410 vp = nd.ni_vp; 2411 2412 if (vp != NULL) 2413 error = EEXIST; 2414 else { 2415 vattr_null(&vattr); 2416 /* We will read cwdi->cwdi_cmask unlocked. */ 2417 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2418 vattr.va_rdev = dev; 2419 2420 switch (mode & S_IFMT) { 2421 case S_IFMT: /* used by badsect to flag bad sectors */ 2422 vattr.va_type = VBAD; 2423 break; 2424 case S_IFCHR: 2425 vattr.va_type = VCHR; 2426 break; 2427 case S_IFBLK: 2428 vattr.va_type = VBLK; 2429 break; 2430 case S_IFWHT: 2431 optype = VOP_WHITEOUT_DESCOFFSET; 2432 break; 2433 case S_IFREG: 2434 #if NVERIEXEC > 0 2435 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2436 O_CREAT); 2437 #endif /* NVERIEXEC > 0 */ 2438 vattr.va_type = VREG; 2439 vattr.va_rdev = VNOVAL; 2440 optype = VOP_CREATE_DESCOFFSET; 2441 break; 2442 default: 2443 error = EINVAL; 2444 break; 2445 } 2446 2447 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2448 vattr.va_rdev == VNOVAL) 2449 error = EINVAL; 2450 } 2451 2452 if (!error) { 2453 switch (optype) { 2454 case VOP_WHITEOUT_DESCOFFSET: 2455 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2456 if (error) 2457 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2458 vput(nd.ni_dvp); 2459 break; 2460 2461 case VOP_MKNOD_DESCOFFSET: 2462 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2463 &nd.ni_cnd, &vattr); 2464 if (error == 0) 2465 vrele(nd.ni_vp); 2466 vput(nd.ni_dvp); 2467 break; 2468 2469 case VOP_CREATE_DESCOFFSET: 2470 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2471 &nd.ni_cnd, &vattr); 2472 if (error == 0) 2473 vrele(nd.ni_vp); 2474 vput(nd.ni_dvp); 2475 break; 2476 } 2477 } else { 2478 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2479 if (nd.ni_dvp == vp) 2480 vrele(nd.ni_dvp); 2481 else 2482 vput(nd.ni_dvp); 2483 if (vp) 2484 vrele(vp); 2485 } 2486 out: 2487 pathbuf_stringcopy_put(pb, pathstring); 2488 pathbuf_destroy(pb); 2489 return (error); 2490 } 2491 2492 /* 2493 * Create a named pipe. 2494 */ 2495 /* ARGSUSED */ 2496 int 2497 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2498 { 2499 /* { 2500 syscallarg(const char *) path; 2501 syscallarg(int) mode; 2502 } */ 2503 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2504 } 2505 2506 int 2507 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2508 register_t *retval) 2509 { 2510 /* { 2511 syscallarg(int) fd; 2512 syscallarg(const char *) path; 2513 syscallarg(int) mode; 2514 } */ 2515 2516 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2517 SCARG(uap, mode)); 2518 } 2519 2520 static int 2521 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2522 { 2523 struct proc *p = l->l_proc; 2524 struct vattr vattr; 2525 int error; 2526 struct pathbuf *pb; 2527 struct nameidata nd; 2528 2529 error = pathbuf_copyin(path, &pb); 2530 if (error) { 2531 return error; 2532 } 2533 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2534 2535 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2536 pathbuf_destroy(pb); 2537 return error; 2538 } 2539 if (nd.ni_vp != NULL) { 2540 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2541 if (nd.ni_dvp == nd.ni_vp) 2542 vrele(nd.ni_dvp); 2543 else 2544 vput(nd.ni_dvp); 2545 vrele(nd.ni_vp); 2546 pathbuf_destroy(pb); 2547 return (EEXIST); 2548 } 2549 vattr_null(&vattr); 2550 vattr.va_type = VFIFO; 2551 /* We will read cwdi->cwdi_cmask unlocked. */ 2552 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2553 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2554 if (error == 0) 2555 vrele(nd.ni_vp); 2556 vput(nd.ni_dvp); 2557 pathbuf_destroy(pb); 2558 return (error); 2559 } 2560 2561 /* 2562 * Make a hard file link. 2563 */ 2564 /* ARGSUSED */ 2565 int 2566 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2567 const char *link, int follow, register_t *retval) 2568 { 2569 struct vnode *vp; 2570 struct pathbuf *linkpb; 2571 struct nameidata nd; 2572 namei_simple_flags_t ns_flags; 2573 int error; 2574 2575 if (follow & AT_SYMLINK_FOLLOW) 2576 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2577 else 2578 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2579 2580 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2581 if (error != 0) 2582 return (error); 2583 error = pathbuf_copyin(link, &linkpb); 2584 if (error) { 2585 goto out1; 2586 } 2587 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2588 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2589 goto out2; 2590 if (nd.ni_vp) { 2591 error = EEXIST; 2592 goto abortop; 2593 } 2594 /* Prevent hard links on directories. */ 2595 if (vp->v_type == VDIR) { 2596 error = EPERM; 2597 goto abortop; 2598 } 2599 /* Prevent cross-mount operation. */ 2600 if (nd.ni_dvp->v_mount != vp->v_mount) { 2601 error = EXDEV; 2602 goto abortop; 2603 } 2604 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2605 VOP_UNLOCK(nd.ni_dvp); 2606 vrele(nd.ni_dvp); 2607 out2: 2608 pathbuf_destroy(linkpb); 2609 out1: 2610 vrele(vp); 2611 return (error); 2612 abortop: 2613 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2614 if (nd.ni_dvp == nd.ni_vp) 2615 vrele(nd.ni_dvp); 2616 else 2617 vput(nd.ni_dvp); 2618 if (nd.ni_vp != NULL) 2619 vrele(nd.ni_vp); 2620 goto out2; 2621 } 2622 2623 int 2624 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2625 { 2626 /* { 2627 syscallarg(const char *) path; 2628 syscallarg(const char *) link; 2629 } */ 2630 const char *path = SCARG(uap, path); 2631 const char *link = SCARG(uap, link); 2632 2633 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2634 AT_SYMLINK_FOLLOW, retval); 2635 } 2636 2637 int 2638 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2639 register_t *retval) 2640 { 2641 /* { 2642 syscallarg(int) fd1; 2643 syscallarg(const char *) name1; 2644 syscallarg(int) fd2; 2645 syscallarg(const char *) name2; 2646 syscallarg(int) flags; 2647 } */ 2648 int fd1 = SCARG(uap, fd1); 2649 const char *name1 = SCARG(uap, name1); 2650 int fd2 = SCARG(uap, fd2); 2651 const char *name2 = SCARG(uap, name2); 2652 int follow; 2653 2654 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2655 2656 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2657 } 2658 2659 2660 int 2661 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2662 { 2663 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2664 } 2665 2666 static int 2667 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2668 const char *link, enum uio_seg seg) 2669 { 2670 struct proc *p = curproc; 2671 struct vattr vattr; 2672 char *path; 2673 int error; 2674 size_t len; 2675 struct pathbuf *linkpb; 2676 struct nameidata nd; 2677 2678 KASSERT(l != NULL || fdat == AT_FDCWD); 2679 2680 path = PNBUF_GET(); 2681 if (seg == UIO_USERSPACE) { 2682 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2683 goto out1; 2684 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2685 goto out1; 2686 } else { 2687 len = strlen(patharg) + 1; 2688 KASSERT(len <= MAXPATHLEN); 2689 memcpy(path, patharg, len); 2690 linkpb = pathbuf_create(link); 2691 if (linkpb == NULL) { 2692 error = ENOMEM; 2693 goto out1; 2694 } 2695 } 2696 ktrkuser("symlink-target", path, len - 1); 2697 2698 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2699 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2700 goto out2; 2701 if (nd.ni_vp) { 2702 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2703 if (nd.ni_dvp == nd.ni_vp) 2704 vrele(nd.ni_dvp); 2705 else 2706 vput(nd.ni_dvp); 2707 vrele(nd.ni_vp); 2708 error = EEXIST; 2709 goto out2; 2710 } 2711 vattr_null(&vattr); 2712 vattr.va_type = VLNK; 2713 /* We will read cwdi->cwdi_cmask unlocked. */ 2714 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2715 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2716 if (error == 0) 2717 vrele(nd.ni_vp); 2718 vput(nd.ni_dvp); 2719 out2: 2720 pathbuf_destroy(linkpb); 2721 out1: 2722 PNBUF_PUT(path); 2723 return (error); 2724 } 2725 2726 /* 2727 * Make a symbolic link. 2728 */ 2729 /* ARGSUSED */ 2730 int 2731 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2732 { 2733 /* { 2734 syscallarg(const char *) path; 2735 syscallarg(const char *) link; 2736 } */ 2737 2738 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2739 UIO_USERSPACE); 2740 } 2741 2742 int 2743 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2744 register_t *retval) 2745 { 2746 /* { 2747 syscallarg(const char *) path1; 2748 syscallarg(int) fd; 2749 syscallarg(const char *) path2; 2750 } */ 2751 2752 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2753 SCARG(uap, path2), UIO_USERSPACE); 2754 } 2755 2756 /* 2757 * Delete a whiteout from the filesystem. 2758 */ 2759 /* ARGSUSED */ 2760 int 2761 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2762 { 2763 /* { 2764 syscallarg(const char *) path; 2765 } */ 2766 int error; 2767 struct pathbuf *pb; 2768 struct nameidata nd; 2769 2770 error = pathbuf_copyin(SCARG(uap, path), &pb); 2771 if (error) { 2772 return error; 2773 } 2774 2775 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2776 error = namei(&nd); 2777 if (error) { 2778 pathbuf_destroy(pb); 2779 return (error); 2780 } 2781 2782 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2783 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2784 if (nd.ni_dvp == nd.ni_vp) 2785 vrele(nd.ni_dvp); 2786 else 2787 vput(nd.ni_dvp); 2788 if (nd.ni_vp) 2789 vrele(nd.ni_vp); 2790 pathbuf_destroy(pb); 2791 return (EEXIST); 2792 } 2793 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2794 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2795 vput(nd.ni_dvp); 2796 pathbuf_destroy(pb); 2797 return (error); 2798 } 2799 2800 /* 2801 * Delete a name from the filesystem. 2802 */ 2803 /* ARGSUSED */ 2804 int 2805 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2806 { 2807 /* { 2808 syscallarg(const char *) path; 2809 } */ 2810 2811 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2812 } 2813 2814 int 2815 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2816 register_t *retval) 2817 { 2818 /* { 2819 syscallarg(int) fd; 2820 syscallarg(const char *) path; 2821 syscallarg(int) flag; 2822 } */ 2823 2824 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2825 SCARG(uap, flag), UIO_USERSPACE); 2826 } 2827 2828 int 2829 do_sys_unlink(const char *arg, enum uio_seg seg) 2830 { 2831 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2832 } 2833 2834 static int 2835 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2836 enum uio_seg seg) 2837 { 2838 struct vnode *vp; 2839 int error; 2840 struct pathbuf *pb; 2841 struct nameidata nd; 2842 const char *pathstring; 2843 2844 KASSERT(l != NULL || fdat == AT_FDCWD); 2845 2846 error = pathbuf_maybe_copyin(arg, seg, &pb); 2847 if (error) { 2848 return error; 2849 } 2850 pathstring = pathbuf_stringcopy_get(pb); 2851 if (pathstring == NULL) { 2852 pathbuf_destroy(pb); 2853 return ENOMEM; 2854 } 2855 2856 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2857 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2858 goto out; 2859 vp = nd.ni_vp; 2860 2861 /* 2862 * The root of a mounted filesystem cannot be deleted. 2863 */ 2864 if ((vp->v_vflag & VV_ROOT) != 0) { 2865 error = EBUSY; 2866 goto abort; 2867 } 2868 2869 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2870 error = EBUSY; 2871 goto abort; 2872 } 2873 2874 /* 2875 * No rmdir "." please. 2876 */ 2877 if (nd.ni_dvp == vp) { 2878 error = EINVAL; 2879 goto abort; 2880 } 2881 2882 /* 2883 * AT_REMOVEDIR is required to remove a directory 2884 */ 2885 if (vp->v_type == VDIR) { 2886 if (!(flags & AT_REMOVEDIR)) { 2887 error = EPERM; 2888 goto abort; 2889 } else { 2890 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2891 vput(nd.ni_dvp); 2892 goto out; 2893 } 2894 } 2895 2896 /* 2897 * Starting here we only deal with non directories. 2898 */ 2899 if (flags & AT_REMOVEDIR) { 2900 error = ENOTDIR; 2901 goto abort; 2902 } 2903 2904 #if NVERIEXEC > 0 2905 /* Handle remove requests for veriexec entries. */ 2906 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2907 goto abort; 2908 } 2909 #endif /* NVERIEXEC > 0 */ 2910 2911 #ifdef FILEASSOC 2912 (void)fileassoc_file_delete(vp); 2913 #endif /* FILEASSOC */ 2914 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2915 vput(nd.ni_dvp); 2916 goto out; 2917 2918 abort: 2919 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2920 if (nd.ni_dvp == vp) 2921 vrele(nd.ni_dvp); 2922 else 2923 vput(nd.ni_dvp); 2924 vput(vp); 2925 2926 out: 2927 pathbuf_stringcopy_put(pb, pathstring); 2928 pathbuf_destroy(pb); 2929 return (error); 2930 } 2931 2932 /* 2933 * Reposition read/write file offset. 2934 */ 2935 int 2936 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2937 { 2938 /* { 2939 syscallarg(int) fd; 2940 syscallarg(int) pad; 2941 syscallarg(off_t) offset; 2942 syscallarg(int) whence; 2943 } */ 2944 file_t *fp; 2945 int error, fd; 2946 2947 switch (SCARG(uap, whence)) { 2948 case SEEK_CUR: 2949 case SEEK_END: 2950 case SEEK_SET: 2951 break; 2952 default: 2953 return EINVAL; 2954 } 2955 2956 fd = SCARG(uap, fd); 2957 2958 if ((fp = fd_getfile(fd)) == NULL) 2959 return (EBADF); 2960 2961 if (fp->f_ops->fo_seek == NULL) { 2962 error = ESPIPE; 2963 goto out; 2964 } 2965 2966 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), 2967 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); 2968 out: 2969 fd_putfile(fd); 2970 return (error); 2971 } 2972 2973 /* 2974 * Positional read system call. 2975 */ 2976 int 2977 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2978 { 2979 /* { 2980 syscallarg(int) fd; 2981 syscallarg(void *) buf; 2982 syscallarg(size_t) nbyte; 2983 syscallarg(off_t) offset; 2984 } */ 2985 file_t *fp; 2986 off_t offset; 2987 int error, fd = SCARG(uap, fd); 2988 2989 if ((fp = fd_getfile(fd)) == NULL) 2990 return (EBADF); 2991 2992 if ((fp->f_flag & FREAD) == 0) { 2993 fd_putfile(fd); 2994 return (EBADF); 2995 } 2996 2997 if (fp->f_ops->fo_seek == NULL) { 2998 error = ESPIPE; 2999 goto out; 3000 } 3001 3002 offset = SCARG(uap, offset); 3003 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3004 if (error) 3005 goto out; 3006 3007 /* dofileread() will unuse the descriptor for us */ 3008 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3009 &offset, 0, retval)); 3010 3011 out: 3012 fd_putfile(fd); 3013 return (error); 3014 } 3015 3016 /* 3017 * Positional scatter read system call. 3018 */ 3019 int 3020 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 3021 { 3022 /* { 3023 syscallarg(int) fd; 3024 syscallarg(const struct iovec *) iovp; 3025 syscallarg(int) iovcnt; 3026 syscallarg(off_t) offset; 3027 } */ 3028 off_t offset = SCARG(uap, offset); 3029 3030 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 3031 SCARG(uap, iovcnt), &offset, 0, retval); 3032 } 3033 3034 /* 3035 * Positional write system call. 3036 */ 3037 int 3038 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 3039 { 3040 /* { 3041 syscallarg(int) fd; 3042 syscallarg(const void *) buf; 3043 syscallarg(size_t) nbyte; 3044 syscallarg(off_t) offset; 3045 } */ 3046 file_t *fp; 3047 off_t offset; 3048 int error, fd = SCARG(uap, fd); 3049 3050 if ((fp = fd_getfile(fd)) == NULL) 3051 return (EBADF); 3052 3053 if ((fp->f_flag & FWRITE) == 0) { 3054 fd_putfile(fd); 3055 return (EBADF); 3056 } 3057 3058 if (fp->f_ops->fo_seek == NULL) { 3059 error = ESPIPE; 3060 goto out; 3061 } 3062 3063 offset = SCARG(uap, offset); 3064 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3065 if (error) 3066 goto out; 3067 3068 /* dofilewrite() will unuse the descriptor for us */ 3069 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3070 &offset, 0, retval)); 3071 3072 out: 3073 fd_putfile(fd); 3074 return (error); 3075 } 3076 3077 /* 3078 * Positional gather write system call. 3079 */ 3080 int 3081 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 3082 { 3083 /* { 3084 syscallarg(int) fd; 3085 syscallarg(const struct iovec *) iovp; 3086 syscallarg(int) iovcnt; 3087 syscallarg(off_t) offset; 3088 } */ 3089 off_t offset = SCARG(uap, offset); 3090 3091 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3092 SCARG(uap, iovcnt), &offset, 0, retval); 3093 } 3094 3095 /* 3096 * Check access permissions. 3097 */ 3098 int 3099 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 3100 { 3101 /* { 3102 syscallarg(const char *) path; 3103 syscallarg(int) flags; 3104 } */ 3105 3106 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3107 SCARG(uap, flags), 0); 3108 } 3109 3110 int 3111 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3112 int mode, int flags) 3113 { 3114 kauth_cred_t cred; 3115 struct vnode *vp; 3116 int error, nd_flag, vmode; 3117 struct pathbuf *pb; 3118 struct nameidata nd; 3119 3120 CTASSERT(F_OK == 0); 3121 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3122 /* nonsense mode */ 3123 return EINVAL; 3124 } 3125 3126 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3127 if (flags & AT_SYMLINK_NOFOLLOW) 3128 nd_flag &= ~FOLLOW; 3129 3130 error = pathbuf_copyin(path, &pb); 3131 if (error) 3132 return error; 3133 3134 NDINIT(&nd, LOOKUP, nd_flag, pb); 3135 3136 /* Override default credentials */ 3137 cred = kauth_cred_dup(l->l_cred); 3138 if (!(flags & AT_EACCESS)) { 3139 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3140 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3141 } 3142 nd.ni_cnd.cn_cred = cred; 3143 3144 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3145 pathbuf_destroy(pb); 3146 goto out; 3147 } 3148 vp = nd.ni_vp; 3149 pathbuf_destroy(pb); 3150 3151 /* Flags == 0 means only check for existence. */ 3152 if (mode) { 3153 vmode = 0; 3154 if (mode & R_OK) 3155 vmode |= VREAD; 3156 if (mode & W_OK) 3157 vmode |= VWRITE; 3158 if (mode & X_OK) 3159 vmode |= VEXEC; 3160 3161 error = VOP_ACCESS(vp, vmode, cred); 3162 if (!error && (vmode & VWRITE)) 3163 error = vn_writechk(vp); 3164 } 3165 vput(vp); 3166 out: 3167 kauth_cred_free(cred); 3168 return (error); 3169 } 3170 3171 int 3172 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3173 register_t *retval) 3174 { 3175 /* { 3176 syscallarg(int) fd; 3177 syscallarg(const char *) path; 3178 syscallarg(int) amode; 3179 syscallarg(int) flag; 3180 } */ 3181 3182 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3183 SCARG(uap, amode), SCARG(uap, flag)); 3184 } 3185 3186 /* 3187 * Common code for all sys_stat functions, including compat versions. 3188 */ 3189 int 3190 do_sys_stat(const char *userpath, unsigned int nd_flag, 3191 struct stat *sb) 3192 { 3193 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3194 } 3195 3196 int 3197 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3198 unsigned int nd_flag, struct stat *sb) 3199 { 3200 int error; 3201 struct pathbuf *pb; 3202 struct nameidata nd; 3203 3204 KASSERT(l != NULL || fdat == AT_FDCWD); 3205 3206 error = pathbuf_copyin(userpath, &pb); 3207 if (error) { 3208 return error; 3209 } 3210 3211 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3212 3213 error = fd_nameiat(l, fdat, &nd); 3214 if (error != 0) { 3215 pathbuf_destroy(pb); 3216 return error; 3217 } 3218 error = vn_stat(nd.ni_vp, sb); 3219 vput(nd.ni_vp); 3220 pathbuf_destroy(pb); 3221 return error; 3222 } 3223 3224 /* 3225 * Get file status; this version follows links. 3226 */ 3227 /* ARGSUSED */ 3228 int 3229 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3230 { 3231 /* { 3232 syscallarg(const char *) path; 3233 syscallarg(struct stat *) ub; 3234 } */ 3235 struct stat sb; 3236 int error; 3237 3238 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3239 if (error) 3240 return error; 3241 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3242 } 3243 3244 /* 3245 * Get file status; this version does not follow links. 3246 */ 3247 /* ARGSUSED */ 3248 int 3249 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3250 { 3251 /* { 3252 syscallarg(const char *) path; 3253 syscallarg(struct stat *) ub; 3254 } */ 3255 struct stat sb; 3256 int error; 3257 3258 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3259 if (error) 3260 return error; 3261 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3262 } 3263 3264 int 3265 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3266 register_t *retval) 3267 { 3268 /* { 3269 syscallarg(int) fd; 3270 syscallarg(const char *) path; 3271 syscallarg(struct stat *) buf; 3272 syscallarg(int) flag; 3273 } */ 3274 unsigned int nd_flag; 3275 struct stat sb; 3276 int error; 3277 3278 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3279 nd_flag = NOFOLLOW; 3280 else 3281 nd_flag = FOLLOW; 3282 3283 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3284 &sb); 3285 if (error) 3286 return error; 3287 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3288 } 3289 3290 static int 3291 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3292 { 3293 int error; 3294 struct pathbuf *pb; 3295 struct nameidata nd; 3296 3297 error = pathbuf_copyin(path, &pb); 3298 if (error) { 3299 return error; 3300 } 3301 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3302 if ((error = namei(&nd)) != 0) { 3303 pathbuf_destroy(pb); 3304 return error; 3305 } 3306 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3307 vput(nd.ni_vp); 3308 pathbuf_destroy(pb); 3309 return error; 3310 } 3311 3312 /* 3313 * Get configurable pathname variables. 3314 */ 3315 /* ARGSUSED */ 3316 int 3317 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3318 register_t *retval) 3319 { 3320 /* { 3321 syscallarg(const char *) path; 3322 syscallarg(int) name; 3323 } */ 3324 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3325 FOLLOW); 3326 } 3327 3328 /* ARGSUSED */ 3329 int 3330 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3331 register_t *retval) 3332 { 3333 /* { 3334 syscallarg(const char *) path; 3335 syscallarg(int) name; 3336 } */ 3337 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3338 NOFOLLOW); 3339 } 3340 3341 /* 3342 * Return target name of a symbolic link. 3343 */ 3344 /* ARGSUSED */ 3345 int 3346 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3347 register_t *retval) 3348 { 3349 /* { 3350 syscallarg(const char *) path; 3351 syscallarg(char *) buf; 3352 syscallarg(size_t) count; 3353 } */ 3354 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3355 SCARG(uap, buf), SCARG(uap, count), retval); 3356 } 3357 3358 static int 3359 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3360 size_t count, register_t *retval) 3361 { 3362 struct vnode *vp; 3363 struct iovec aiov; 3364 struct uio auio; 3365 int error; 3366 struct pathbuf *pb; 3367 struct nameidata nd; 3368 3369 error = pathbuf_copyin(path, &pb); 3370 if (error) { 3371 return error; 3372 } 3373 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 3374 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3375 pathbuf_destroy(pb); 3376 return error; 3377 } 3378 vp = nd.ni_vp; 3379 pathbuf_destroy(pb); 3380 if (vp->v_type != VLNK) 3381 error = EINVAL; 3382 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3383 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3384 aiov.iov_base = buf; 3385 aiov.iov_len = count; 3386 auio.uio_iov = &aiov; 3387 auio.uio_iovcnt = 1; 3388 auio.uio_offset = 0; 3389 auio.uio_rw = UIO_READ; 3390 KASSERT(l == curlwp); 3391 auio.uio_vmspace = l->l_proc->p_vmspace; 3392 auio.uio_resid = count; 3393 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3394 *retval = count - auio.uio_resid; 3395 } 3396 vput(vp); 3397 return (error); 3398 } 3399 3400 int 3401 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3402 register_t *retval) 3403 { 3404 /* { 3405 syscallarg(int) fd; 3406 syscallarg(const char *) path; 3407 syscallarg(char *) buf; 3408 syscallarg(size_t) bufsize; 3409 } */ 3410 3411 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3412 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3413 } 3414 3415 /* 3416 * Change flags of a file given a path name. 3417 */ 3418 /* ARGSUSED */ 3419 int 3420 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3421 { 3422 /* { 3423 syscallarg(const char *) path; 3424 syscallarg(u_long) flags; 3425 } */ 3426 struct vnode *vp; 3427 int error; 3428 3429 error = namei_simple_user(SCARG(uap, path), 3430 NSM_FOLLOW_TRYEMULROOT, &vp); 3431 if (error != 0) 3432 return (error); 3433 error = change_flags(vp, SCARG(uap, flags), l); 3434 vput(vp); 3435 return (error); 3436 } 3437 3438 /* 3439 * Change flags of a file given a file descriptor. 3440 */ 3441 /* ARGSUSED */ 3442 int 3443 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3444 { 3445 /* { 3446 syscallarg(int) fd; 3447 syscallarg(u_long) flags; 3448 } */ 3449 struct vnode *vp; 3450 file_t *fp; 3451 int error; 3452 3453 /* fd_getvnode() will use the descriptor for us */ 3454 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3455 return (error); 3456 vp = fp->f_vnode; 3457 error = change_flags(vp, SCARG(uap, flags), l); 3458 VOP_UNLOCK(vp); 3459 fd_putfile(SCARG(uap, fd)); 3460 return (error); 3461 } 3462 3463 /* 3464 * Change flags of a file given a path name; this version does 3465 * not follow links. 3466 */ 3467 int 3468 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3469 { 3470 /* { 3471 syscallarg(const char *) path; 3472 syscallarg(u_long) flags; 3473 } */ 3474 struct vnode *vp; 3475 int error; 3476 3477 error = namei_simple_user(SCARG(uap, path), 3478 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3479 if (error != 0) 3480 return (error); 3481 error = change_flags(vp, SCARG(uap, flags), l); 3482 vput(vp); 3483 return (error); 3484 } 3485 3486 /* 3487 * Common routine to change flags of a file. 3488 */ 3489 int 3490 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3491 { 3492 struct vattr vattr; 3493 int error; 3494 3495 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3496 3497 vattr_null(&vattr); 3498 vattr.va_flags = flags; 3499 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3500 3501 return (error); 3502 } 3503 3504 /* 3505 * Change mode of a file given path name; this version follows links. 3506 */ 3507 /* ARGSUSED */ 3508 int 3509 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3510 { 3511 /* { 3512 syscallarg(const char *) path; 3513 syscallarg(int) mode; 3514 } */ 3515 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3516 SCARG(uap, mode), 0); 3517 } 3518 3519 int 3520 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3521 { 3522 int error; 3523 struct vnode *vp; 3524 namei_simple_flags_t ns_flag; 3525 3526 if (flags & AT_SYMLINK_NOFOLLOW) 3527 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3528 else 3529 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3530 3531 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3532 if (error != 0) 3533 return error; 3534 3535 error = change_mode(vp, mode, l); 3536 3537 vrele(vp); 3538 3539 return (error); 3540 } 3541 3542 /* 3543 * Change mode of a file given a file descriptor. 3544 */ 3545 /* ARGSUSED */ 3546 int 3547 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3548 { 3549 /* { 3550 syscallarg(int) fd; 3551 syscallarg(int) mode; 3552 } */ 3553 file_t *fp; 3554 int error; 3555 3556 /* fd_getvnode() will use the descriptor for us */ 3557 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3558 return (error); 3559 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3560 fd_putfile(SCARG(uap, fd)); 3561 return (error); 3562 } 3563 3564 int 3565 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3566 register_t *retval) 3567 { 3568 /* { 3569 syscallarg(int) fd; 3570 syscallarg(const char *) path; 3571 syscallarg(int) mode; 3572 syscallarg(int) flag; 3573 } */ 3574 3575 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3576 SCARG(uap, mode), SCARG(uap, flag)); 3577 } 3578 3579 /* 3580 * Change mode of a file given path name; this version does not follow links. 3581 */ 3582 /* ARGSUSED */ 3583 int 3584 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3585 { 3586 /* { 3587 syscallarg(const char *) path; 3588 syscallarg(int) mode; 3589 } */ 3590 int error; 3591 struct vnode *vp; 3592 3593 error = namei_simple_user(SCARG(uap, path), 3594 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3595 if (error != 0) 3596 return (error); 3597 3598 error = change_mode(vp, SCARG(uap, mode), l); 3599 3600 vrele(vp); 3601 return (error); 3602 } 3603 3604 /* 3605 * Common routine to set mode given a vnode. 3606 */ 3607 static int 3608 change_mode(struct vnode *vp, int mode, struct lwp *l) 3609 { 3610 struct vattr vattr; 3611 int error; 3612 3613 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3614 vattr_null(&vattr); 3615 vattr.va_mode = mode & ALLPERMS; 3616 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3617 VOP_UNLOCK(vp); 3618 return (error); 3619 } 3620 3621 /* 3622 * Set ownership given a path name; this version follows links. 3623 */ 3624 /* ARGSUSED */ 3625 int 3626 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3627 { 3628 /* { 3629 syscallarg(const char *) path; 3630 syscallarg(uid_t) uid; 3631 syscallarg(gid_t) gid; 3632 } */ 3633 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3634 SCARG(uap, gid), 0); 3635 } 3636 3637 int 3638 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3639 gid_t gid, int flags) 3640 { 3641 int error; 3642 struct vnode *vp; 3643 namei_simple_flags_t ns_flag; 3644 3645 if (flags & AT_SYMLINK_NOFOLLOW) 3646 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3647 else 3648 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3649 3650 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3651 if (error != 0) 3652 return error; 3653 3654 error = change_owner(vp, uid, gid, l, 0); 3655 3656 vrele(vp); 3657 3658 return (error); 3659 } 3660 3661 /* 3662 * Set ownership given a path name; this version follows links. 3663 * Provides POSIX semantics. 3664 */ 3665 /* ARGSUSED */ 3666 int 3667 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3668 { 3669 /* { 3670 syscallarg(const char *) path; 3671 syscallarg(uid_t) uid; 3672 syscallarg(gid_t) gid; 3673 } */ 3674 int error; 3675 struct vnode *vp; 3676 3677 error = namei_simple_user(SCARG(uap, path), 3678 NSM_FOLLOW_TRYEMULROOT, &vp); 3679 if (error != 0) 3680 return (error); 3681 3682 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3683 3684 vrele(vp); 3685 return (error); 3686 } 3687 3688 /* 3689 * Set ownership given a file descriptor. 3690 */ 3691 /* ARGSUSED */ 3692 int 3693 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3694 { 3695 /* { 3696 syscallarg(int) fd; 3697 syscallarg(uid_t) uid; 3698 syscallarg(gid_t) gid; 3699 } */ 3700 int error; 3701 file_t *fp; 3702 3703 /* fd_getvnode() will use the descriptor for us */ 3704 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3705 return (error); 3706 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3707 l, 0); 3708 fd_putfile(SCARG(uap, fd)); 3709 return (error); 3710 } 3711 3712 int 3713 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3714 register_t *retval) 3715 { 3716 /* { 3717 syscallarg(int) fd; 3718 syscallarg(const char *) path; 3719 syscallarg(uid_t) owner; 3720 syscallarg(gid_t) group; 3721 syscallarg(int) flag; 3722 } */ 3723 3724 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3725 SCARG(uap, owner), SCARG(uap, group), 3726 SCARG(uap, flag)); 3727 } 3728 3729 /* 3730 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3731 */ 3732 /* ARGSUSED */ 3733 int 3734 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3735 { 3736 /* { 3737 syscallarg(int) fd; 3738 syscallarg(uid_t) uid; 3739 syscallarg(gid_t) gid; 3740 } */ 3741 int error; 3742 file_t *fp; 3743 3744 /* fd_getvnode() will use the descriptor for us */ 3745 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3746 return (error); 3747 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3748 l, 1); 3749 fd_putfile(SCARG(uap, fd)); 3750 return (error); 3751 } 3752 3753 /* 3754 * Set ownership given a path name; this version does not follow links. 3755 */ 3756 /* ARGSUSED */ 3757 int 3758 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3759 { 3760 /* { 3761 syscallarg(const char *) path; 3762 syscallarg(uid_t) uid; 3763 syscallarg(gid_t) gid; 3764 } */ 3765 int error; 3766 struct vnode *vp; 3767 3768 error = namei_simple_user(SCARG(uap, path), 3769 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3770 if (error != 0) 3771 return (error); 3772 3773 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3774 3775 vrele(vp); 3776 return (error); 3777 } 3778 3779 /* 3780 * Set ownership given a path name; this version does not follow links. 3781 * Provides POSIX/XPG semantics. 3782 */ 3783 /* ARGSUSED */ 3784 int 3785 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3786 { 3787 /* { 3788 syscallarg(const char *) path; 3789 syscallarg(uid_t) uid; 3790 syscallarg(gid_t) gid; 3791 } */ 3792 int error; 3793 struct vnode *vp; 3794 3795 error = namei_simple_user(SCARG(uap, path), 3796 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3797 if (error != 0) 3798 return (error); 3799 3800 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3801 3802 vrele(vp); 3803 return (error); 3804 } 3805 3806 /* 3807 * Common routine to set ownership given a vnode. 3808 */ 3809 static int 3810 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3811 int posix_semantics) 3812 { 3813 struct vattr vattr; 3814 mode_t newmode; 3815 int error; 3816 3817 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3818 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3819 goto out; 3820 3821 #define CHANGED(x) ((int)(x) != -1) 3822 newmode = vattr.va_mode; 3823 if (posix_semantics) { 3824 /* 3825 * POSIX/XPG semantics: if the caller is not the super-user, 3826 * clear set-user-id and set-group-id bits. Both POSIX and 3827 * the XPG consider the behaviour for calls by the super-user 3828 * implementation-defined; we leave the set-user-id and set- 3829 * group-id settings intact in that case. 3830 */ 3831 if (vattr.va_mode & S_ISUID) { 3832 if (kauth_authorize_vnode(l->l_cred, 3833 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3834 newmode &= ~S_ISUID; 3835 } 3836 if (vattr.va_mode & S_ISGID) { 3837 if (kauth_authorize_vnode(l->l_cred, 3838 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3839 newmode &= ~S_ISGID; 3840 } 3841 } else { 3842 /* 3843 * NetBSD semantics: when changing owner and/or group, 3844 * clear the respective bit(s). 3845 */ 3846 if (CHANGED(uid)) 3847 newmode &= ~S_ISUID; 3848 if (CHANGED(gid)) 3849 newmode &= ~S_ISGID; 3850 } 3851 /* Update va_mode iff altered. */ 3852 if (vattr.va_mode == newmode) 3853 newmode = VNOVAL; 3854 3855 vattr_null(&vattr); 3856 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3857 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3858 vattr.va_mode = newmode; 3859 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3860 #undef CHANGED 3861 3862 out: 3863 VOP_UNLOCK(vp); 3864 return (error); 3865 } 3866 3867 /* 3868 * Set the access and modification times given a path name; this 3869 * version follows links. 3870 */ 3871 /* ARGSUSED */ 3872 int 3873 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3874 register_t *retval) 3875 { 3876 /* { 3877 syscallarg(const char *) path; 3878 syscallarg(const struct timeval *) tptr; 3879 } */ 3880 3881 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3882 SCARG(uap, tptr), UIO_USERSPACE); 3883 } 3884 3885 /* 3886 * Set the access and modification times given a file descriptor. 3887 */ 3888 /* ARGSUSED */ 3889 int 3890 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3891 register_t *retval) 3892 { 3893 /* { 3894 syscallarg(int) fd; 3895 syscallarg(const struct timeval *) tptr; 3896 } */ 3897 int error; 3898 file_t *fp; 3899 3900 /* fd_getvnode() will use the descriptor for us */ 3901 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3902 return (error); 3903 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3904 UIO_USERSPACE); 3905 fd_putfile(SCARG(uap, fd)); 3906 return (error); 3907 } 3908 3909 int 3910 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3911 register_t *retval) 3912 { 3913 /* { 3914 syscallarg(int) fd; 3915 syscallarg(const struct timespec *) tptr; 3916 } */ 3917 int error; 3918 file_t *fp; 3919 3920 /* fd_getvnode() will use the descriptor for us */ 3921 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3922 return (error); 3923 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3924 SCARG(uap, tptr), UIO_USERSPACE); 3925 fd_putfile(SCARG(uap, fd)); 3926 return (error); 3927 } 3928 3929 /* 3930 * Set the access and modification times given a path name; this 3931 * version does not follow links. 3932 */ 3933 int 3934 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3935 register_t *retval) 3936 { 3937 /* { 3938 syscallarg(const char *) path; 3939 syscallarg(const struct timeval *) tptr; 3940 } */ 3941 3942 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3943 SCARG(uap, tptr), UIO_USERSPACE); 3944 } 3945 3946 int 3947 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3948 register_t *retval) 3949 { 3950 /* { 3951 syscallarg(int) fd; 3952 syscallarg(const char *) path; 3953 syscallarg(const struct timespec *) tptr; 3954 syscallarg(int) flag; 3955 } */ 3956 int follow; 3957 const struct timespec *tptr; 3958 int error; 3959 3960 tptr = SCARG(uap, tptr); 3961 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3962 3963 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3964 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3965 3966 return error; 3967 } 3968 3969 /* 3970 * Common routine to set access and modification times given a vnode. 3971 */ 3972 int 3973 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3974 const struct timespec *tptr, enum uio_seg seg) 3975 { 3976 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3977 } 3978 3979 int 3980 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3981 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3982 { 3983 struct vattr vattr; 3984 int error, dorele = 0; 3985 namei_simple_flags_t sflags; 3986 bool vanull, setbirthtime; 3987 struct timespec ts[2]; 3988 3989 KASSERT(l != NULL || fdat == AT_FDCWD); 3990 3991 /* 3992 * I have checked all callers and they pass either FOLLOW, 3993 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3994 * is 0. More to the point, they don't pass anything else. 3995 * Let's keep it that way at least until the namei interfaces 3996 * are fully sanitized. 3997 */ 3998 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3999 sflags = (flag == FOLLOW) ? 4000 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 4001 4002 if (tptr == NULL) { 4003 vanull = true; 4004 nanotime(&ts[0]); 4005 ts[1] = ts[0]; 4006 } else { 4007 vanull = false; 4008 if (seg != UIO_SYSSPACE) { 4009 error = copyin(tptr, ts, sizeof (ts)); 4010 if (error != 0) 4011 return error; 4012 } else { 4013 ts[0] = tptr[0]; 4014 ts[1] = tptr[1]; 4015 } 4016 } 4017 4018 if (ts[0].tv_nsec == UTIME_NOW) { 4019 nanotime(&ts[0]); 4020 if (ts[1].tv_nsec == UTIME_NOW) { 4021 vanull = true; 4022 ts[1] = ts[0]; 4023 } 4024 } else if (ts[1].tv_nsec == UTIME_NOW) 4025 nanotime(&ts[1]); 4026 4027 if (vp == NULL) { 4028 /* note: SEG describes TPTR, not PATH; PATH is always user */ 4029 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 4030 if (error != 0) 4031 return error; 4032 dorele = 1; 4033 } 4034 4035 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4036 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 4037 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 4038 vattr_null(&vattr); 4039 4040 if (ts[0].tv_nsec != UTIME_OMIT) 4041 vattr.va_atime = ts[0]; 4042 4043 if (ts[1].tv_nsec != UTIME_OMIT) { 4044 vattr.va_mtime = ts[1]; 4045 if (setbirthtime) 4046 vattr.va_birthtime = ts[1]; 4047 } 4048 4049 if (vanull) 4050 vattr.va_vaflags |= VA_UTIMES_NULL; 4051 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4052 VOP_UNLOCK(vp); 4053 4054 if (dorele != 0) 4055 vrele(vp); 4056 4057 return error; 4058 } 4059 4060 int 4061 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4062 const struct timeval *tptr, enum uio_seg seg) 4063 { 4064 struct timespec ts[2]; 4065 struct timespec *tsptr = NULL; 4066 int error; 4067 4068 if (tptr != NULL) { 4069 struct timeval tv[2]; 4070 4071 if (seg != UIO_SYSSPACE) { 4072 error = copyin(tptr, tv, sizeof(tv)); 4073 if (error != 0) 4074 return error; 4075 tptr = tv; 4076 } 4077 4078 if ((tptr[0].tv_usec == UTIME_NOW) || 4079 (tptr[0].tv_usec == UTIME_OMIT)) 4080 ts[0].tv_nsec = tptr[0].tv_usec; 4081 else { 4082 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4083 return EINVAL; 4084 4085 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4086 } 4087 4088 if ((tptr[1].tv_usec == UTIME_NOW) || 4089 (tptr[1].tv_usec == UTIME_OMIT)) 4090 ts[1].tv_nsec = tptr[1].tv_usec; 4091 else { 4092 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4093 return EINVAL; 4094 4095 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4096 } 4097 4098 tsptr = &ts[0]; 4099 } 4100 4101 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4102 } 4103 4104 /* 4105 * Truncate a file given its path name. 4106 */ 4107 /* ARGSUSED */ 4108 int 4109 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 4110 { 4111 /* { 4112 syscallarg(const char *) path; 4113 syscallarg(int) pad; 4114 syscallarg(off_t) length; 4115 } */ 4116 struct vnode *vp; 4117 struct vattr vattr; 4118 int error; 4119 4120 if (SCARG(uap, length) < 0) 4121 return EINVAL; 4122 4123 error = namei_simple_user(SCARG(uap, path), 4124 NSM_FOLLOW_TRYEMULROOT, &vp); 4125 if (error != 0) 4126 return (error); 4127 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4128 if (vp->v_type == VDIR) 4129 error = EISDIR; 4130 else if ((error = vn_writechk(vp)) == 0 && 4131 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4132 vattr_null(&vattr); 4133 vattr.va_size = SCARG(uap, length); 4134 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4135 } 4136 vput(vp); 4137 return (error); 4138 } 4139 4140 /* 4141 * Truncate a file given a file descriptor. 4142 */ 4143 /* ARGSUSED */ 4144 int 4145 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 4146 { 4147 /* { 4148 syscallarg(int) fd; 4149 syscallarg(int) pad; 4150 syscallarg(off_t) length; 4151 } */ 4152 file_t *fp; 4153 int error, fd = SCARG(uap, fd); 4154 4155 fp = fd_getfile(fd); 4156 if (fp == NULL) 4157 return EBADF; 4158 if (fp->f_ops->fo_truncate == NULL) 4159 error = EOPNOTSUPP; 4160 else 4161 error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length)); 4162 4163 fd_putfile(fd); 4164 return error; 4165 } 4166 4167 /* 4168 * Sync an open file. 4169 */ 4170 /* ARGSUSED */ 4171 int 4172 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4173 { 4174 /* { 4175 syscallarg(int) fd; 4176 } */ 4177 struct vnode *vp; 4178 file_t *fp; 4179 int error; 4180 4181 /* fd_getvnode() will use the descriptor for us */ 4182 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4183 return (error); 4184 vp = fp->f_vnode; 4185 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4186 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4187 VOP_UNLOCK(vp); 4188 fd_putfile(SCARG(uap, fd)); 4189 return (error); 4190 } 4191 4192 /* 4193 * Sync a range of file data. API modeled after that found in AIX. 4194 * 4195 * FDATASYNC indicates that we need only save enough metadata to be able 4196 * to re-read the written data. 4197 */ 4198 /* ARGSUSED */ 4199 int 4200 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4201 { 4202 /* { 4203 syscallarg(int) fd; 4204 syscallarg(int) flags; 4205 syscallarg(off_t) start; 4206 syscallarg(off_t) length; 4207 } */ 4208 struct vnode *vp; 4209 file_t *fp; 4210 int flags, nflags; 4211 off_t s, e, len; 4212 int error; 4213 4214 /* fd_getvnode() will use the descriptor for us */ 4215 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4216 return (error); 4217 4218 if ((fp->f_flag & FWRITE) == 0) { 4219 error = EBADF; 4220 goto out; 4221 } 4222 4223 flags = SCARG(uap, flags); 4224 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4225 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4226 error = EINVAL; 4227 goto out; 4228 } 4229 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4230 if (flags & FDATASYNC) 4231 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4232 else 4233 nflags = FSYNC_WAIT; 4234 if (flags & FDISKSYNC) 4235 nflags |= FSYNC_CACHE; 4236 4237 len = SCARG(uap, length); 4238 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4239 if (len) { 4240 s = SCARG(uap, start); 4241 if (s < 0 || len < 0 || len > OFF_T_MAX - s) { 4242 error = EINVAL; 4243 goto out; 4244 } 4245 e = s + len; 4246 KASSERT(s <= e); 4247 } else { 4248 e = 0; 4249 s = 0; 4250 } 4251 4252 vp = fp->f_vnode; 4253 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4254 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4255 VOP_UNLOCK(vp); 4256 out: 4257 fd_putfile(SCARG(uap, fd)); 4258 return (error); 4259 } 4260 4261 /* 4262 * Sync the data of an open file. 4263 */ 4264 /* ARGSUSED */ 4265 int 4266 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4267 { 4268 /* { 4269 syscallarg(int) fd; 4270 } */ 4271 struct vnode *vp; 4272 file_t *fp; 4273 int error; 4274 4275 /* fd_getvnode() will use the descriptor for us */ 4276 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4277 return (error); 4278 vp = fp->f_vnode; 4279 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4280 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4281 VOP_UNLOCK(vp); 4282 fd_putfile(SCARG(uap, fd)); 4283 return (error); 4284 } 4285 4286 /* 4287 * Rename files, (standard) BSD semantics frontend. 4288 */ 4289 /* ARGSUSED */ 4290 int 4291 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4292 { 4293 /* { 4294 syscallarg(const char *) from; 4295 syscallarg(const char *) to; 4296 } */ 4297 4298 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4299 SCARG(uap, to), UIO_USERSPACE, 0)); 4300 } 4301 4302 int 4303 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4304 register_t *retval) 4305 { 4306 /* { 4307 syscallarg(int) fromfd; 4308 syscallarg(const char *) from; 4309 syscallarg(int) tofd; 4310 syscallarg(const char *) to; 4311 } */ 4312 4313 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4314 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4315 } 4316 4317 /* 4318 * Rename files, POSIX semantics frontend. 4319 */ 4320 /* ARGSUSED */ 4321 int 4322 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4323 { 4324 /* { 4325 syscallarg(const char *) from; 4326 syscallarg(const char *) to; 4327 } */ 4328 4329 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4330 SCARG(uap, to), UIO_USERSPACE, 1)); 4331 } 4332 4333 /* 4334 * Rename files. Source and destination must either both be directories, 4335 * or both not be directories. If target is a directory, it must be empty. 4336 * If `from' and `to' refer to the same object, the value of the `retain' 4337 * argument is used to determine whether `from' will be 4338 * 4339 * (retain == 0) deleted unless `from' and `to' refer to the same 4340 * object in the file system's name space (BSD). 4341 * (retain == 1) always retained (POSIX). 4342 * 4343 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4344 */ 4345 int 4346 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4347 { 4348 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4349 } 4350 4351 static int 4352 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4353 const char *to, enum uio_seg seg, int retain) 4354 { 4355 struct pathbuf *fpb, *tpb; 4356 struct nameidata fnd, tnd; 4357 struct vnode *fdvp, *fvp; 4358 struct vnode *tdvp, *tvp; 4359 struct mount *mp, *tmp; 4360 int error; 4361 4362 KASSERT(l != NULL || fromfd == AT_FDCWD); 4363 KASSERT(l != NULL || tofd == AT_FDCWD); 4364 4365 error = pathbuf_maybe_copyin(from, seg, &fpb); 4366 if (error) 4367 goto out0; 4368 KASSERT(fpb != NULL); 4369 4370 error = pathbuf_maybe_copyin(to, seg, &tpb); 4371 if (error) 4372 goto out1; 4373 KASSERT(tpb != NULL); 4374 4375 /* 4376 * Lookup from. 4377 * 4378 * XXX LOCKPARENT is wrong because we don't actually want it 4379 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4380 * insane, so for the time being we need to leave it like this. 4381 */ 4382 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4383 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4384 goto out2; 4385 4386 /* 4387 * Pull out the important results of the lookup, fdvp and fvp. 4388 * Of course, fvp is bogus because we're about to unlock fdvp. 4389 */ 4390 fdvp = fnd.ni_dvp; 4391 fvp = fnd.ni_vp; 4392 mp = fdvp->v_mount; 4393 KASSERT(fdvp != NULL); 4394 KASSERT(fvp != NULL); 4395 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4396 /* 4397 * Bracket the operation with fstrans_start()/fstrans_done(). 4398 * 4399 * Inside the bracket this file system cannot be unmounted so 4400 * a vnode on this file system cannot change its v_mount. 4401 * A vnode on another file system may still change to dead mount. 4402 */ 4403 fstrans_start(mp); 4404 4405 /* 4406 * Make sure neither fdvp nor fvp is locked. 4407 */ 4408 if (fdvp != fvp) 4409 VOP_UNLOCK(fdvp); 4410 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4411 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4412 4413 /* 4414 * Reject renaming `.' and `..'. Can't do this until after 4415 * namei because we need namei's parsing to find the final 4416 * component name. (namei should just leave us with the final 4417 * component name and not look it up itself, but anyway...) 4418 * 4419 * This was here before because we used to relookup from 4420 * instead of to and relookup requires the caller to check 4421 * this, but now file systems may depend on this check, so we 4422 * must retain it until the file systems are all rototilled. 4423 */ 4424 if (((fnd.ni_cnd.cn_namelen == 1) && 4425 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4426 ((fnd.ni_cnd.cn_namelen == 2) && 4427 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4428 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4429 error = EINVAL; /* XXX EISDIR? */ 4430 goto abort0; 4431 } 4432 4433 /* 4434 * Lookup to. 4435 * 4436 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4437 * fvp here to decide whether to add CREATEDIR is a load of 4438 * bollocks because fvp might be the wrong node by now, since 4439 * fdvp is unlocked. 4440 * 4441 * XXX Why not pass CREATEDIR always? 4442 */ 4443 NDINIT(&tnd, RENAME, 4444 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4445 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4446 tpb); 4447 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4448 goto abort0; 4449 4450 /* 4451 * Pull out the important results of the lookup, tdvp and tvp. 4452 * Of course, tvp is bogus because we're about to unlock tdvp. 4453 */ 4454 tdvp = tnd.ni_dvp; 4455 tvp = tnd.ni_vp; 4456 KASSERT(tdvp != NULL); 4457 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4458 4459 if (fvp->v_type == VDIR) 4460 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4461 /* 4462 * Make sure neither tdvp nor tvp is locked. 4463 */ 4464 if (tdvp != tvp) 4465 VOP_UNLOCK(tdvp); 4466 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4467 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4468 4469 /* 4470 * Reject renaming onto `.' or `..'. relookup is unhappy with 4471 * these, which is why we must do this here. Once upon a time 4472 * we relooked up from instead of to, and consequently didn't 4473 * need this check, but now that we relookup to instead of 4474 * from, we need this; and we shall need it forever forward 4475 * until the VOP_RENAME protocol changes, because file systems 4476 * will no doubt begin to depend on this check. 4477 */ 4478 if ((tnd.ni_cnd.cn_namelen == 1) && (tnd.ni_cnd.cn_nameptr[0] == '.')) { 4479 error = EISDIR; 4480 goto abort1; 4481 } 4482 if ((tnd.ni_cnd.cn_namelen == 2) && 4483 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4484 (tnd.ni_cnd.cn_nameptr[1] == '.')) { 4485 error = EINVAL; 4486 goto abort1; 4487 } 4488 4489 /* 4490 * Make sure the mount points match. Although we don't hold 4491 * any vnode locks, the v_mount on fdvp file system are stable. 4492 * 4493 * Unmounting another file system at an inopportune moment may 4494 * cause tdvp to disappear and change its v_mount to dead. 4495 * 4496 * So in either case different v_mount means cross-device rename. 4497 */ 4498 KASSERT(mp != NULL); 4499 tmp = tdvp->v_mount; 4500 4501 if (mp != tmp) { 4502 error = EXDEV; 4503 goto abort1; 4504 } 4505 4506 /* 4507 * Take the vfs rename lock to avoid cross-directory screw cases. 4508 * Nothing is locked currently, so taking this lock is safe. 4509 */ 4510 error = VFS_RENAMELOCK_ENTER(mp); 4511 if (error) 4512 goto abort1; 4513 4514 /* 4515 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4516 * and nothing is locked except for the vfs rename lock. 4517 * 4518 * The next step is a little rain dance to conform to the 4519 * insane lock protocol, even though it does nothing to ward 4520 * off race conditions. 4521 * 4522 * We need tdvp and tvp to be locked. However, because we have 4523 * unlocked tdvp in order to hold no locks while we take the 4524 * vfs rename lock, tvp may be wrong here, and we can't safely 4525 * lock it even if the sensible file systems will just unlock 4526 * it straight away. Consequently, we must lock tdvp and then 4527 * relookup tvp to get it locked. 4528 * 4529 * Finally, because the VOP_RENAME protocol is brain-damaged 4530 * and various file systems insanely depend on the semantics of 4531 * this brain damage, the lookup of to must be the last lookup 4532 * before VOP_RENAME. 4533 */ 4534 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4535 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4536 if (error) 4537 goto abort2; 4538 4539 /* 4540 * Drop the old tvp and pick up the new one -- which might be 4541 * the same, but that doesn't matter to us. After this, tdvp 4542 * and tvp should both be locked. 4543 */ 4544 if (tvp != NULL) 4545 vrele(tvp); 4546 tvp = tnd.ni_vp; 4547 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4548 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4549 4550 /* 4551 * The old do_sys_rename had various consistency checks here 4552 * involving fvp and tvp. fvp is bogus already here, and tvp 4553 * will become bogus soon in any sensible file system, so the 4554 * only purpose in putting these checks here is to give lip 4555 * service to these screw cases and to acknowledge that they 4556 * exist, not actually to handle them, but here you go 4557 * anyway... 4558 */ 4559 4560 /* 4561 * Acknowledge that directories and non-directories aren't 4562 * supposed to mix. 4563 */ 4564 if (tvp != NULL) { 4565 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4566 error = ENOTDIR; 4567 goto abort3; 4568 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4569 error = EISDIR; 4570 goto abort3; 4571 } 4572 } 4573 4574 /* 4575 * Acknowledge some random screw case, among the dozens that 4576 * might arise. 4577 */ 4578 if (fvp == tdvp) { 4579 error = EINVAL; 4580 goto abort3; 4581 } 4582 4583 /* 4584 * Acknowledge that POSIX has a wacky screw case. 4585 * 4586 * XXX Eventually the retain flag needs to be passed on to 4587 * VOP_RENAME. 4588 */ 4589 if (fvp == tvp) { 4590 if (retain) { 4591 error = 0; 4592 goto abort3; 4593 } else if ((fdvp == tdvp) && 4594 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4595 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4596 fnd.ni_cnd.cn_namelen))) { 4597 error = 0; 4598 goto abort3; 4599 } 4600 } 4601 4602 /* 4603 * Make sure veriexec can screw us up. (But a race can screw 4604 * up veriexec, of course -- remember, fvp and (soon) tvp are 4605 * bogus.) 4606 */ 4607 #if NVERIEXEC > 0 4608 { 4609 char *f1, *f2; 4610 size_t f1_len; 4611 size_t f2_len; 4612 4613 f1_len = fnd.ni_cnd.cn_namelen + 1; 4614 f1 = kmem_alloc(f1_len, KM_SLEEP); 4615 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4616 4617 f2_len = tnd.ni_cnd.cn_namelen + 1; 4618 f2 = kmem_alloc(f2_len, KM_SLEEP); 4619 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4620 4621 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4622 4623 kmem_free(f1, f1_len); 4624 kmem_free(f2, f2_len); 4625 4626 if (error) 4627 goto abort3; 4628 } 4629 #endif /* NVERIEXEC > 0 */ 4630 4631 /* 4632 * All ready. Incant the rename vop. 4633 */ 4634 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4635 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4636 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4637 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4638 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4639 4640 /* 4641 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4642 * tdvp and tvp. But we can't assert any of that. 4643 */ 4644 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4645 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4646 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4647 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4648 4649 /* 4650 * So all we have left to do is to drop the rename lock and 4651 * destroy the pathbufs. 4652 */ 4653 VFS_RENAMELOCK_EXIT(mp); 4654 fstrans_done(mp); 4655 goto out2; 4656 4657 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4658 VOP_UNLOCK(tvp); 4659 abort2: VOP_UNLOCK(tdvp); 4660 VFS_RENAMELOCK_EXIT(mp); 4661 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4662 vrele(tdvp); 4663 if (tvp != NULL) 4664 vrele(tvp); 4665 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4666 vrele(fdvp); 4667 vrele(fvp); 4668 fstrans_done(mp); 4669 out2: pathbuf_destroy(tpb); 4670 out1: pathbuf_destroy(fpb); 4671 out0: return error; 4672 } 4673 4674 /* 4675 * Make a directory file. 4676 */ 4677 /* ARGSUSED */ 4678 int 4679 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4680 { 4681 /* { 4682 syscallarg(const char *) path; 4683 syscallarg(int) mode; 4684 } */ 4685 4686 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4687 SCARG(uap, mode), UIO_USERSPACE); 4688 } 4689 4690 int 4691 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4692 register_t *retval) 4693 { 4694 /* { 4695 syscallarg(int) fd; 4696 syscallarg(const char *) path; 4697 syscallarg(int) mode; 4698 } */ 4699 4700 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4701 SCARG(uap, mode), UIO_USERSPACE); 4702 } 4703 4704 4705 int 4706 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4707 { 4708 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4709 } 4710 4711 static int 4712 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4713 enum uio_seg seg) 4714 { 4715 struct proc *p = curlwp->l_proc; 4716 struct vnode *vp; 4717 struct vattr vattr; 4718 int error; 4719 struct pathbuf *pb; 4720 struct nameidata nd; 4721 4722 KASSERT(l != NULL || fdat == AT_FDCWD); 4723 4724 /* XXX bollocks, should pass in a pathbuf */ 4725 error = pathbuf_maybe_copyin(path, seg, &pb); 4726 if (error) { 4727 return error; 4728 } 4729 4730 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4731 4732 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4733 pathbuf_destroy(pb); 4734 return (error); 4735 } 4736 vp = nd.ni_vp; 4737 if (vp != NULL) { 4738 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4739 if (nd.ni_dvp == vp) 4740 vrele(nd.ni_dvp); 4741 else 4742 vput(nd.ni_dvp); 4743 vrele(vp); 4744 pathbuf_destroy(pb); 4745 return (EEXIST); 4746 } 4747 vattr_null(&vattr); 4748 vattr.va_type = VDIR; 4749 /* We will read cwdi->cwdi_cmask unlocked. */ 4750 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4751 nd.ni_cnd.cn_flags |= WILLBEDIR; 4752 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4753 if (!error) 4754 vrele(nd.ni_vp); 4755 vput(nd.ni_dvp); 4756 pathbuf_destroy(pb); 4757 return (error); 4758 } 4759 4760 /* 4761 * Remove a directory file. 4762 */ 4763 /* ARGSUSED */ 4764 int 4765 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4766 { 4767 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4768 AT_REMOVEDIR, UIO_USERSPACE); 4769 } 4770 4771 /* 4772 * Read a block of directory entries in a file system independent format. 4773 */ 4774 int 4775 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4776 { 4777 /* { 4778 syscallarg(int) fd; 4779 syscallarg(char *) buf; 4780 syscallarg(size_t) count; 4781 } */ 4782 file_t *fp; 4783 int error, done; 4784 4785 /* fd_getvnode() will use the descriptor for us */ 4786 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4787 return (error); 4788 if ((fp->f_flag & FREAD) == 0) { 4789 error = EBADF; 4790 goto out; 4791 } 4792 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4793 SCARG(uap, count), &done, l, 0, 0); 4794 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4795 *retval = done; 4796 out: 4797 fd_putfile(SCARG(uap, fd)); 4798 return (error); 4799 } 4800 4801 /* 4802 * Set the mode mask for creation of filesystem nodes. 4803 */ 4804 int 4805 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4806 { 4807 /* { 4808 syscallarg(mode_t) newmask; 4809 } */ 4810 4811 /* 4812 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4813 * serialization with those reads is required. It's important to 4814 * return a coherent answer for the caller of umask() though, and 4815 * the atomic operation accomplishes that. 4816 */ 4817 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4818 SCARG(uap, newmask) & ALLPERMS); 4819 4820 return (0); 4821 } 4822 4823 int 4824 dorevoke(struct vnode *vp, kauth_cred_t cred) 4825 { 4826 struct vattr vattr; 4827 int error, fs_decision; 4828 4829 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4830 error = VOP_GETATTR(vp, &vattr, cred); 4831 VOP_UNLOCK(vp); 4832 if (error != 0) 4833 return error; 4834 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4835 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4836 fs_decision); 4837 if (!error) 4838 VOP_REVOKE(vp, REVOKEALL); 4839 return (error); 4840 } 4841 4842 /* 4843 * Void all references to file by ripping underlying filesystem 4844 * away from vnode. 4845 */ 4846 /* ARGSUSED */ 4847 int 4848 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4849 { 4850 /* { 4851 syscallarg(const char *) path; 4852 } */ 4853 struct vnode *vp; 4854 int error; 4855 4856 error = namei_simple_user(SCARG(uap, path), 4857 NSM_FOLLOW_TRYEMULROOT, &vp); 4858 if (error != 0) 4859 return (error); 4860 error = dorevoke(vp, l->l_cred); 4861 vrele(vp); 4862 return (error); 4863 } 4864 4865 /* 4866 * Allocate backing store for a file, filling a hole without having to 4867 * explicitly write anything out. 4868 */ 4869 /* ARGSUSED */ 4870 int 4871 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4872 register_t *retval) 4873 { 4874 /* { 4875 syscallarg(int) fd; 4876 syscallarg(off_t) pos; 4877 syscallarg(off_t) len; 4878 } */ 4879 int fd; 4880 off_t pos, len; 4881 struct file *fp; 4882 struct vnode *vp; 4883 int error; 4884 4885 fd = SCARG(uap, fd); 4886 pos = SCARG(uap, pos); 4887 len = SCARG(uap, len); 4888 4889 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4890 *retval = EINVAL; 4891 return 0; 4892 } 4893 4894 error = fd_getvnode(fd, &fp); 4895 if (error) { 4896 *retval = error; 4897 return 0; 4898 } 4899 if ((fp->f_flag & FWRITE) == 0) { 4900 error = EBADF; 4901 goto fail; 4902 } 4903 vp = fp->f_vnode; 4904 4905 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4906 if (vp->v_type == VDIR) { 4907 error = EISDIR; 4908 } else { 4909 error = VOP_FALLOCATE(vp, pos, len); 4910 } 4911 VOP_UNLOCK(vp); 4912 4913 fail: 4914 fd_putfile(fd); 4915 *retval = error; 4916 return 0; 4917 } 4918 4919 /* 4920 * Deallocate backing store for a file, creating a hole. Also used for 4921 * invoking TRIM on disks. 4922 */ 4923 /* ARGSUSED */ 4924 int 4925 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 4926 register_t *retval) 4927 { 4928 /* { 4929 syscallarg(int) fd; 4930 syscallarg(off_t) pos; 4931 syscallarg(off_t) len; 4932 } */ 4933 int fd; 4934 off_t pos, len; 4935 struct file *fp; 4936 struct vnode *vp; 4937 int error; 4938 4939 fd = SCARG(uap, fd); 4940 pos = SCARG(uap, pos); 4941 len = SCARG(uap, len); 4942 4943 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4944 return EINVAL; 4945 } 4946 4947 error = fd_getvnode(fd, &fp); 4948 if (error) { 4949 return error; 4950 } 4951 if ((fp->f_flag & FWRITE) == 0) { 4952 error = EBADF; 4953 goto fail; 4954 } 4955 vp = fp->f_vnode; 4956 4957 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4958 if (vp->v_type == VDIR) { 4959 error = EISDIR; 4960 } else { 4961 error = VOP_FDISCARD(vp, pos, len); 4962 } 4963 VOP_UNLOCK(vp); 4964 4965 fail: 4966 fd_putfile(fd); 4967 return error; 4968 } 4969