1 /* $NetBSD: vfs_syscalls.c,v 1.570 2024/12/07 02:23:09 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009, 2019, 2020, 2023 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.570 2024/12/07 02:23:09 riastradh Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/types.h> 82 83 #include <sys/atomic.h> 84 #include <sys/buf.h> 85 #include <sys/compat_stub.h> 86 #include <sys/dirent.h> 87 #include <sys/event.h> 88 #include <sys/extattr.h> 89 #include <sys/fcntl.h> 90 #include <sys/file.h> 91 #ifdef FILEASSOC 92 #include <sys/fileassoc.h> 93 #endif /* FILEASSOC */ 94 #include <sys/filedesc.h> 95 #include <sys/fstrans.h> 96 #include <sys/kauth.h> 97 #include <sys/kernel.h> 98 #include <sys/kmem.h> 99 #include <sys/ktrace.h> 100 #include <sys/module.h> 101 #include <sys/mount.h> 102 #include <sys/namei.h> 103 #include <sys/proc.h> 104 #include <sys/quota.h> 105 #include <sys/quotactl.h> 106 #include <sys/stat.h> 107 #include <sys/syscallargs.h> 108 #include <sys/sysctl.h> 109 #include <sys/systm.h> 110 #include <sys/uio.h> 111 #include <sys/verified_exec.h> 112 #include <sys/vfs_syscalls.h> 113 #include <sys/vnode.h> 114 115 #include <miscfs/genfs/genfs.h> 116 #include <miscfs/specfs/specdev.h> 117 118 #include <nfs/nfs.h> 119 #include <nfs/nfs_var.h> 120 #include <nfs/nfsproto.h> 121 #include <nfs/rpcv2.h> 122 123 /* XXX this shouldn't be here */ 124 #ifndef OFF_T_MAX 125 #define OFF_T_MAX __type_max(off_t) 126 #endif 127 128 static int change_flags(struct vnode *, u_long, struct lwp *); 129 static int change_mode(struct vnode *, int, struct lwp *); 130 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 131 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 132 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 133 enum uio_seg); 134 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 135 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 136 enum uio_seg); 137 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 138 enum uio_seg, int); 139 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 140 size_t, register_t *); 141 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 142 143 static int fd_nameiat(struct lwp *, int, struct nameidata *); 144 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 145 namei_simple_flags_t, struct vnode **); 146 147 /* 148 * This table is used to maintain compatibility with 4.3BSD 149 * and NetBSD 0.9 mount syscalls - and possibly other systems. 150 * Note, the order is important! 151 * 152 * Do not modify this table. It should only contain filesystems 153 * supported by NetBSD 0.9 and 4.3BSD. 154 */ 155 const char * const mountcompatnames[] = { 156 NULL, /* 0 = MOUNT_NONE */ 157 MOUNT_FFS, /* 1 = MOUNT_UFS */ 158 MOUNT_NFS, /* 2 */ 159 MOUNT_MFS, /* 3 */ 160 MOUNT_MSDOS, /* 4 */ 161 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 162 MOUNT_FDESC, /* 6 */ 163 MOUNT_KERNFS, /* 7 */ 164 NULL, /* 8 = MOUNT_DEVFS */ 165 MOUNT_AFS, /* 9 */ 166 }; 167 168 const u_int nmountcompatnames = __arraycount(mountcompatnames); 169 170 /* 171 * Filter event method for EVFILT_FS. 172 */ 173 static struct klist fs_klist; 174 static kmutex_t fs_klist_lock; 175 176 CTASSERT((NOTE_SUBMIT & VQ_MOUNT) == 0); 177 CTASSERT((NOTE_SUBMIT & VQ_UNMOUNT) == 0); 178 179 void 180 vfs_evfilt_fs_init(void) 181 { 182 183 klist_init(&fs_klist); 184 mutex_init(&fs_klist_lock, MUTEX_DEFAULT, IPL_NONE); 185 } 186 187 static int 188 filt_fsattach(struct knote *kn) 189 { 190 191 mutex_enter(&fs_klist_lock); 192 kn->kn_flags |= EV_CLEAR; 193 klist_insert(&fs_klist, kn); 194 mutex_exit(&fs_klist_lock); 195 196 return 0; 197 } 198 199 static void 200 filt_fsdetach(struct knote *kn) 201 { 202 203 mutex_enter(&fs_klist_lock); 204 klist_remove(&fs_klist, kn); 205 mutex_exit(&fs_klist_lock); 206 } 207 208 static int 209 filt_fs(struct knote *kn, long hint) 210 { 211 int rv; 212 213 if (hint & NOTE_SUBMIT) { 214 KASSERT(mutex_owned(&fs_klist_lock)); 215 kn->kn_fflags |= hint & ~NOTE_SUBMIT; 216 } else { 217 mutex_enter(&fs_klist_lock); 218 } 219 220 rv = (kn->kn_fflags != 0); 221 222 if ((hint & NOTE_SUBMIT) == 0) { 223 mutex_exit(&fs_klist_lock); 224 } 225 226 return rv; 227 } 228 229 /* referenced in kern_event.c */ 230 const struct filterops fs_filtops = { 231 .f_flags = FILTEROP_MPSAFE, 232 .f_attach = filt_fsattach, 233 .f_detach = filt_fsdetach, 234 .f_event = filt_fs, 235 }; 236 237 static int 238 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 239 { 240 file_t *dfp; 241 int error; 242 const char *path = pathbuf_stringcopy_get(ndp->ni_pathbuf); 243 244 if (fdat != AT_FDCWD && path[0] != '/') { 245 if ((error = fd_getvnode(fdat, &dfp)) != 0) 246 goto out; 247 248 NDAT(ndp, dfp->f_vnode); 249 } 250 251 error = namei(ndp); 252 253 if (fdat != AT_FDCWD && path[0] != '/') 254 fd_putfile(fdat); 255 out: 256 pathbuf_stringcopy_put(ndp->ni_pathbuf, path); 257 return error; 258 } 259 260 static int 261 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 262 namei_simple_flags_t sflags, struct vnode **vp_ret) 263 { 264 file_t *dfp; 265 struct vnode *dvp; 266 int error; 267 struct pathbuf *pb; 268 const char *p; 269 270 error = pathbuf_copyin(path, &pb); 271 if (error) { 272 return error; 273 } 274 p = pathbuf_stringcopy_get(pb); 275 276 if (fdat != AT_FDCWD && p[0] != '/') { 277 if ((error = fd_getvnode(fdat, &dfp)) != 0) 278 goto out; 279 280 dvp = dfp->f_vnode; 281 } else { 282 dvp = NULL; 283 } 284 285 error = nameiat_simple(dvp, pb, sflags, vp_ret); 286 287 if (fdat != AT_FDCWD && p[0] != '/') 288 fd_putfile(fdat); 289 290 out: 291 pathbuf_stringcopy_put(pb, p); 292 pathbuf_destroy(pb); 293 294 return error; 295 } 296 297 static int 298 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 299 { 300 int error; 301 302 fp->f_flag = flags & FMASK; 303 fp->f_type = DTYPE_VNODE; 304 fp->f_ops = &vnops; 305 fp->f_vnode = vp; 306 307 if (flags & (O_EXLOCK | O_SHLOCK)) { 308 struct flock lf; 309 int type; 310 311 lf.l_whence = SEEK_SET; 312 lf.l_start = 0; 313 lf.l_len = 0; 314 if (flags & O_EXLOCK) 315 lf.l_type = F_WRLCK; 316 else 317 lf.l_type = F_RDLCK; 318 type = F_FLOCK; 319 if ((flags & FNONBLOCK) == 0) 320 type |= F_WAIT; 321 VOP_UNLOCK(vp); 322 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 323 if (error) { 324 (void) vn_close(vp, fp->f_flag, fp->f_cred); 325 fd_abort(l->l_proc, fp, indx); 326 return error; 327 } 328 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 329 atomic_or_uint(&fp->f_flag, FHASLOCK); 330 } 331 if (flags & O_CLOEXEC) 332 fd_set_exclose(l, indx, true); 333 return 0; 334 } 335 336 static int 337 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 338 void *data, size_t *data_len) 339 { 340 struct mount *mp; 341 int error = 0, saved_flags; 342 343 mp = vp->v_mount; 344 saved_flags = mp->mnt_flag; 345 346 /* We can operate only on VV_ROOT nodes. */ 347 if ((vp->v_vflag & VV_ROOT) == 0) { 348 error = EINVAL; 349 goto out; 350 } 351 352 /* 353 * We only allow the filesystem to be reloaded if it 354 * is currently mounted read-only. Additionally, we 355 * prevent read-write to read-only downgrades. 356 */ 357 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 358 (mp->mnt_flag & MNT_RDONLY) == 0 && 359 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 360 error = EOPNOTSUPP; /* Needs translation */ 361 goto out; 362 } 363 364 /* 365 * Enabling MNT_UNION requires a covered mountpoint and 366 * must not happen on the root mount. 367 */ 368 if ((flags & MNT_UNION) != 0 && mp->mnt_vnodecovered == NULLVP) { 369 error = EOPNOTSUPP; 370 goto out; 371 } 372 373 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 374 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 375 if (error) 376 goto out; 377 378 error = vfs_suspend(mp, 0); 379 if (error) 380 goto out; 381 382 mutex_enter(mp->mnt_updating); 383 384 mp->mnt_flag &= ~MNT_OP_FLAGS; 385 mp->mnt_flag |= flags & MNT_OP_FLAGS; 386 387 /* 388 * Set the mount level flags. 389 */ 390 if ((flags & MNT_RDONLY) != (mp->mnt_flag & MNT_RDONLY)) { 391 if ((flags & MNT_RDONLY)) 392 mp->mnt_iflag |= IMNT_WANTRDONLY; 393 else 394 mp->mnt_iflag |= IMNT_WANTRDWR; 395 } 396 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 397 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 398 if ((mp->mnt_iflag & IMNT_WANTRDONLY)) 399 mp->mnt_flag &= ~MNT_RDONLY; 400 401 error = VFS_MOUNT(mp, path, data, data_len); 402 403 if (error && data != NULL) { 404 int error2; 405 406 /* 407 * Update failed; let's try and see if it was an 408 * export request. For compat with 3.0 and earlier. 409 */ 410 error2 = vfs_hooks_reexport(mp, path, data); 411 412 /* 413 * Only update error code if the export request was 414 * understood but some problem occurred while 415 * processing it. 416 */ 417 if (error2 != EJUSTRETURN) 418 error = error2; 419 } 420 421 if (error == 0 && (mp->mnt_iflag & IMNT_WANTRDONLY)) 422 mp->mnt_flag |= MNT_RDONLY; 423 if (error) 424 mp->mnt_flag = saved_flags; 425 mp->mnt_flag &= ~MNT_OP_FLAGS; 426 mp->mnt_iflag &= ~(IMNT_WANTRDONLY | IMNT_WANTRDWR); 427 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 428 if ((mp->mnt_iflag & IMNT_ONWORKLIST) == 0) 429 vfs_syncer_add_to_worklist(mp); 430 } else { 431 if ((mp->mnt_iflag & IMNT_ONWORKLIST) != 0) 432 vfs_syncer_remove_from_worklist(mp); 433 } 434 mutex_exit(mp->mnt_updating); 435 vfs_resume(mp); 436 437 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 438 (flags & MNT_EXTATTR)) { 439 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 440 NULL, 0, NULL) != 0) { 441 printf("%s: failed to start extattr, error = %d", 442 mp->mnt_stat.f_mntonname, error); 443 mp->mnt_flag &= ~MNT_EXTATTR; 444 } 445 } 446 447 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 448 !(flags & MNT_EXTATTR)) { 449 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 450 NULL, 0, NULL) != 0) { 451 printf("%s: failed to stop extattr, error = %d", 452 mp->mnt_stat.f_mntonname, error); 453 mp->mnt_flag |= MNT_RDONLY; 454 } 455 } 456 out: 457 return (error); 458 } 459 460 static int 461 mount_get_vfsops(const char *fstype, enum uio_seg type_seg, 462 struct vfsops **vfsops) 463 { 464 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 465 int error; 466 467 if (type_seg == UIO_USERSPACE) { 468 /* Copy file-system type from userspace. */ 469 error = copyinstr(fstype, fstypename, sizeof(fstypename), 470 NULL); 471 } else { 472 error = copystr(fstype, fstypename, sizeof(fstypename), NULL); 473 KASSERT(error == 0); 474 } 475 476 if (error) { 477 /* 478 * Historically, filesystem types were identified by numbers. 479 * If we get an integer for the filesystem type instead of a 480 * string, we check to see if it matches one of the historic 481 * filesystem types. 482 */ 483 u_long fsindex = (u_long)fstype; 484 if (fsindex >= nmountcompatnames || 485 mountcompatnames[fsindex] == NULL) 486 return ENODEV; 487 strlcpy(fstypename, mountcompatnames[fsindex], 488 sizeof(fstypename)); 489 } 490 491 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 492 if (strcmp(fstypename, "ufs") == 0) 493 fstypename[0] = 'f'; 494 495 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 496 return 0; 497 498 /* If we can autoload a vfs module, try again */ 499 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 500 501 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 502 return 0; 503 504 return ENODEV; 505 } 506 507 static int 508 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 509 void *data, size_t *data_len) 510 { 511 struct mount *mp; 512 int error; 513 514 /* If MNT_GETARGS is specified, it should be the only flag. */ 515 if (flags & ~MNT_GETARGS) 516 return EINVAL; 517 518 mp = vp->v_mount; 519 520 /* XXX: probably some notion of "can see" here if we want isolation. */ 521 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 522 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 523 if (error) 524 return error; 525 526 if ((vp->v_vflag & VV_ROOT) == 0) 527 return EINVAL; 528 529 if (vfs_busy(mp)) 530 return EPERM; 531 532 mutex_enter(mp->mnt_updating); 533 mp->mnt_flag &= ~MNT_OP_FLAGS; 534 mp->mnt_flag |= MNT_GETARGS; 535 error = VFS_MOUNT(mp, path, data, data_len); 536 mp->mnt_flag &= ~MNT_OP_FLAGS; 537 mutex_exit(mp->mnt_updating); 538 539 vfs_unbusy(mp); 540 return (error); 541 } 542 543 int 544 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, 545 register_t *retval) 546 { 547 /* { 548 syscallarg(const char *) type; 549 syscallarg(const char *) path; 550 syscallarg(int) flags; 551 syscallarg(void *) data; 552 syscallarg(size_t) data_len; 553 } */ 554 555 return do_sys_mount(l, SCARG(uap, type), UIO_USERSPACE, 556 SCARG(uap, path), SCARG(uap, flags), 557 SCARG(uap, data), UIO_USERSPACE, SCARG(uap, data_len), 558 retval); 559 } 560 561 int 562 do_sys_mount(struct lwp *l, const char *type, enum uio_seg type_seg, 563 const char *path, int flags, 564 void *data, enum uio_seg data_seg, size_t data_len, 565 register_t *retval) 566 { 567 struct vfsops *vfsops = NULL; /* XXX gcc4.8 */ 568 struct vnode *vp; 569 void *data_buf = data; 570 bool vfsopsrele = false; 571 size_t alloc_sz = 0; 572 int error; 573 574 /* 575 * Get vnode to be covered 576 */ 577 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 578 if (error != 0) { 579 vp = NULL; 580 goto done; 581 } 582 583 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 584 vfsops = vp->v_mount->mnt_op; 585 } else { 586 /* 'type' is userspace */ 587 error = mount_get_vfsops(type, type_seg, &vfsops); 588 if (error != 0) 589 goto done; 590 vfsopsrele = true; 591 } 592 593 /* 594 * We allow data to be NULL, even for userspace. Some fs's don't need 595 * it. The others will handle NULL. 596 */ 597 if (data != NULL && data_seg == UIO_USERSPACE) { 598 if (data_len == 0) { 599 /* No length supplied, use default for filesystem */ 600 data_len = vfsops->vfs_min_mount_data; 601 602 /* 603 * Hopefully a longer buffer won't make copyin() fail. 604 * For compatibility with 3.0 and earlier. 605 */ 606 if (flags & MNT_UPDATE 607 && data_len < sizeof (struct mnt_export_args30)) 608 data_len = sizeof (struct mnt_export_args30); 609 } 610 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 611 error = EINVAL; 612 goto done; 613 } 614 alloc_sz = data_len; 615 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 616 617 /* NFS needs the buffer even for mnt_getargs .... */ 618 error = copyin(data, data_buf, data_len); 619 if (error != 0) 620 goto done; 621 } 622 623 if (flags & MNT_GETARGS) { 624 if (data_len == 0) { 625 error = EINVAL; 626 goto done; 627 } 628 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 629 if (error != 0) 630 goto done; 631 if (data_seg == UIO_USERSPACE) 632 error = copyout(data_buf, data, data_len); 633 *retval = data_len; 634 } else if (flags & MNT_UPDATE) { 635 error = mount_update(l, vp, path, flags, data_buf, &data_len); 636 } else { 637 /* Locking is handled internally in mount_domount(). */ 638 KASSERT(vfsopsrele == true); 639 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 640 &data_len); 641 vfsopsrele = false; 642 } 643 if (!error) { 644 mutex_enter(&fs_klist_lock); 645 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_MOUNT); 646 mutex_exit(&fs_klist_lock); 647 } 648 649 done: 650 if (vfsopsrele) 651 vfs_delref(vfsops); 652 if (vp != NULL) { 653 vrele(vp); 654 } 655 if (data_buf != data) 656 kmem_free(data_buf, alloc_sz); 657 return (error); 658 } 659 660 /* 661 * Unmount a file system. 662 * 663 * Note: unmount takes a path to the vnode mounted on as argument, 664 * not special file (as before). 665 */ 666 /* ARGSUSED */ 667 int 668 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, 669 register_t *retval) 670 { 671 /* { 672 syscallarg(const char *) path; 673 syscallarg(int) flags; 674 } */ 675 struct vnode *vp; 676 struct mount *mp; 677 int error; 678 struct pathbuf *pb; 679 struct nameidata nd; 680 681 error = pathbuf_copyin(SCARG(uap, path), &pb); 682 if (error) { 683 return error; 684 } 685 686 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 687 if ((error = namei(&nd)) != 0) { 688 pathbuf_destroy(pb); 689 return error; 690 } 691 vp = nd.ni_vp; 692 pathbuf_destroy(pb); 693 694 mp = vp->v_mount; 695 vfs_ref(mp); 696 VOP_UNLOCK(vp); 697 698 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 699 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 700 if (error) { 701 vrele(vp); 702 vfs_rele(mp); 703 return (error); 704 } 705 706 /* 707 * Don't allow unmounting the root file system. 708 */ 709 if (mp->mnt_flag & MNT_ROOTFS) { 710 vrele(vp); 711 vfs_rele(mp); 712 return (EINVAL); 713 } 714 715 /* 716 * Must be the root of the filesystem 717 */ 718 if ((vp->v_vflag & VV_ROOT) == 0) { 719 vrele(vp); 720 vfs_rele(mp); 721 return (EINVAL); 722 } 723 724 vrele(vp); 725 error = dounmount(mp, SCARG(uap, flags), l); 726 vfs_rele(mp); 727 if (!error) { 728 mutex_enter(&fs_klist_lock); 729 KNOTE(&fs_klist, NOTE_SUBMIT | VQ_UNMOUNT); 730 mutex_exit(&fs_klist_lock); 731 } 732 return error; 733 } 734 735 /* 736 * Sync each mounted filesystem. 737 */ 738 #ifdef DEBUG 739 int syncprt = 0; 740 struct ctldebug debug0 = { "syncprt", &syncprt }; 741 #endif 742 743 void 744 do_sys_sync(struct lwp *l) 745 { 746 mount_iterator_t *iter; 747 struct mount *mp; 748 int asyncflag; 749 750 mountlist_iterator_init(&iter); 751 while ((mp = mountlist_iterator_next(iter)) != NULL) { 752 mutex_enter(mp->mnt_updating); 753 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 754 /* 755 * Temporarily clear the MNT_ASYNC flags so that 756 * bwrite() doesnt convert the sync writes to 757 * delayed writes. 758 */ 759 asyncflag = mp->mnt_flag & MNT_ASYNC; 760 mp->mnt_flag &= ~MNT_ASYNC; 761 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 762 mp->mnt_flag |= asyncflag; 763 } 764 mutex_exit(mp->mnt_updating); 765 } 766 mountlist_iterator_destroy(iter); 767 #ifdef DEBUG 768 if (syncprt) 769 vfs_bufstats(); 770 #endif /* DEBUG */ 771 } 772 773 static bool 774 sync_vnode_filter(void *cookie, vnode_t *vp) 775 { 776 777 if (vp->v_numoutput > 0) { 778 ++*(int *)cookie; 779 } 780 return false; 781 } 782 783 int 784 vfs_syncwait(void) 785 { 786 int nbusy, nbusy_prev, iter; 787 struct vnode_iterator *vniter; 788 mount_iterator_t *mpiter; 789 struct mount *mp; 790 791 for (nbusy_prev = 0, iter = 0; iter < 20;) { 792 nbusy = 0; 793 mountlist_iterator_init(&mpiter); 794 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 795 vnode_t *vp __diagused; 796 vfs_vnode_iterator_init(mp, &vniter); 797 vp = vfs_vnode_iterator_next(vniter, 798 sync_vnode_filter, &nbusy); 799 KASSERT(vp == NULL); 800 vfs_vnode_iterator_destroy(vniter); 801 } 802 mountlist_iterator_destroy(mpiter); 803 804 if (nbusy == 0) 805 break; 806 if (nbusy_prev == 0) 807 nbusy_prev = nbusy; 808 printf("%d ", nbusy); 809 kpause("syncwait", false, MAX(1, hz / 25 * iter), NULL); 810 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 811 iter++; 812 else 813 nbusy_prev = nbusy; 814 } 815 816 if (nbusy) { 817 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 818 printf("giving up\nPrinting vnodes for busy buffers\n"); 819 mountlist_iterator_init(&mpiter); 820 while ((mp = mountlist_iterator_next(mpiter)) != NULL) { 821 vnode_t *vp; 822 vfs_vnode_iterator_init(mp, &vniter); 823 vp = vfs_vnode_iterator_next(vniter, 824 NULL, NULL); 825 mutex_enter(vp->v_interlock); 826 if (vp->v_numoutput > 0) 827 vprint(NULL, vp); 828 mutex_exit(vp->v_interlock); 829 vrele(vp); 830 vfs_vnode_iterator_destroy(vniter); 831 } 832 mountlist_iterator_destroy(mpiter); 833 #endif 834 } 835 836 return nbusy; 837 } 838 839 /* ARGSUSED */ 840 int 841 sys_sync(struct lwp *l, const void *v, register_t *retval) 842 { 843 844 do_sys_sync(l); 845 return (0); 846 } 847 848 /* 849 * Access or change filesystem quotas. 850 * 851 * (this is really 14 different calls bundled into one) 852 */ 853 854 static int 855 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 856 { 857 struct quotastat info_k; 858 int error; 859 860 /* ensure any padding bytes are cleared */ 861 memset(&info_k, 0, sizeof(info_k)); 862 863 error = vfs_quotactl_stat(mp, &info_k); 864 if (error) { 865 return error; 866 } 867 868 return copyout(&info_k, info_u, sizeof(info_k)); 869 } 870 871 static int 872 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 873 struct quotaidtypestat *info_u) 874 { 875 struct quotaidtypestat info_k; 876 int error; 877 878 /* ensure any padding bytes are cleared */ 879 memset(&info_k, 0, sizeof(info_k)); 880 881 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 882 if (error) { 883 return error; 884 } 885 886 return copyout(&info_k, info_u, sizeof(info_k)); 887 } 888 889 static int 890 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 891 struct quotaobjtypestat *info_u) 892 { 893 struct quotaobjtypestat info_k; 894 int error; 895 896 /* ensure any padding bytes are cleared */ 897 memset(&info_k, 0, sizeof(info_k)); 898 899 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 900 if (error) { 901 return error; 902 } 903 904 return copyout(&info_k, info_u, sizeof(info_k)); 905 } 906 907 static int 908 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 909 struct quotaval *val_u) 910 { 911 struct quotakey key_k; 912 struct quotaval val_k; 913 int error; 914 915 /* ensure any padding bytes are cleared */ 916 memset(&val_k, 0, sizeof(val_k)); 917 918 error = copyin(key_u, &key_k, sizeof(key_k)); 919 if (error) { 920 return error; 921 } 922 923 error = vfs_quotactl_get(mp, &key_k, &val_k); 924 if (error) { 925 return error; 926 } 927 928 return copyout(&val_k, val_u, sizeof(val_k)); 929 } 930 931 static int 932 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 933 const struct quotaval *val_u) 934 { 935 struct quotakey key_k; 936 struct quotaval val_k; 937 int error; 938 939 error = copyin(key_u, &key_k, sizeof(key_k)); 940 if (error) { 941 return error; 942 } 943 944 error = copyin(val_u, &val_k, sizeof(val_k)); 945 if (error) { 946 return error; 947 } 948 949 return vfs_quotactl_put(mp, &key_k, &val_k); 950 } 951 952 static int 953 do_sys_quotactl_del(struct mount *mp, const struct quotakey *key_u) 954 { 955 struct quotakey key_k; 956 int error; 957 958 error = copyin(key_u, &key_k, sizeof(key_k)); 959 if (error) { 960 return error; 961 } 962 963 return vfs_quotactl_del(mp, &key_k); 964 } 965 966 static int 967 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 968 { 969 struct quotakcursor cursor_k; 970 int error; 971 972 /* ensure any padding bytes are cleared */ 973 memset(&cursor_k, 0, sizeof(cursor_k)); 974 975 error = vfs_quotactl_cursoropen(mp, &cursor_k); 976 if (error) { 977 return error; 978 } 979 980 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 981 } 982 983 static int 984 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 985 { 986 struct quotakcursor cursor_k; 987 int error; 988 989 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 990 if (error) { 991 return error; 992 } 993 994 return vfs_quotactl_cursorclose(mp, &cursor_k); 995 } 996 997 static int 998 do_sys_quotactl_cursorskipidtype(struct mount *mp, 999 struct quotakcursor *cursor_u, int idtype) 1000 { 1001 struct quotakcursor cursor_k; 1002 int error; 1003 1004 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1005 if (error) { 1006 return error; 1007 } 1008 1009 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 1010 if (error) { 1011 return error; 1012 } 1013 1014 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1015 } 1016 1017 static int 1018 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 1019 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 1020 unsigned *ret_u) 1021 { 1022 #define CGET_STACK_MAX 8 1023 struct quotakcursor cursor_k; 1024 struct quotakey stackkeys[CGET_STACK_MAX]; 1025 struct quotaval stackvals[CGET_STACK_MAX]; 1026 struct quotakey *keys_k; 1027 struct quotaval *vals_k; 1028 unsigned ret_k; 1029 int error; 1030 1031 if (maxnum > 128) { 1032 maxnum = 128; 1033 } 1034 1035 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1036 if (error) { 1037 return error; 1038 } 1039 1040 if (maxnum <= CGET_STACK_MAX) { 1041 keys_k = stackkeys; 1042 vals_k = stackvals; 1043 /* ensure any padding bytes are cleared */ 1044 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 1045 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 1046 } else { 1047 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 1048 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 1049 } 1050 1051 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 1052 &ret_k); 1053 if (error) { 1054 goto fail; 1055 } 1056 1057 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 1058 if (error) { 1059 goto fail; 1060 } 1061 1062 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 1063 if (error) { 1064 goto fail; 1065 } 1066 1067 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1068 if (error) { 1069 goto fail; 1070 } 1071 1072 /* do last to maximize the chance of being able to recover a failure */ 1073 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1074 1075 fail: 1076 if (keys_k != stackkeys) { 1077 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 1078 } 1079 if (vals_k != stackvals) { 1080 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 1081 } 1082 return error; 1083 } 1084 1085 static int 1086 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 1087 int *ret_u) 1088 { 1089 struct quotakcursor cursor_k; 1090 int ret_k; 1091 int error; 1092 1093 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1094 if (error) { 1095 return error; 1096 } 1097 1098 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 1099 if (error) { 1100 return error; 1101 } 1102 1103 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 1104 if (error) { 1105 return error; 1106 } 1107 1108 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1109 } 1110 1111 static int 1112 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 1113 { 1114 struct quotakcursor cursor_k; 1115 int error; 1116 1117 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 1118 if (error) { 1119 return error; 1120 } 1121 1122 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 1123 if (error) { 1124 return error; 1125 } 1126 1127 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 1128 } 1129 1130 static int 1131 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 1132 { 1133 char *path_k; 1134 int error; 1135 1136 /* XXX this should probably be a struct pathbuf */ 1137 path_k = PNBUF_GET(); 1138 error = copyin(path_u, path_k, PATH_MAX); 1139 if (error) { 1140 PNBUF_PUT(path_k); 1141 return error; 1142 } 1143 1144 error = vfs_quotactl_quotaon(mp, idtype, path_k); 1145 1146 PNBUF_PUT(path_k); 1147 return error; 1148 } 1149 1150 static int 1151 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 1152 { 1153 1154 return vfs_quotactl_quotaoff(mp, idtype); 1155 } 1156 1157 int 1158 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 1159 { 1160 struct mount *mp; 1161 struct vnode *vp; 1162 int error; 1163 1164 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 1165 if (error != 0) 1166 return (error); 1167 mp = vp->v_mount; 1168 1169 switch (args->qc_op) { 1170 case QUOTACTL_STAT: 1171 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 1172 break; 1173 case QUOTACTL_IDTYPESTAT: 1174 error = do_sys_quotactl_idtypestat(mp, 1175 args->u.idtypestat.qc_idtype, 1176 args->u.idtypestat.qc_info); 1177 break; 1178 case QUOTACTL_OBJTYPESTAT: 1179 error = do_sys_quotactl_objtypestat(mp, 1180 args->u.objtypestat.qc_objtype, 1181 args->u.objtypestat.qc_info); 1182 break; 1183 case QUOTACTL_GET: 1184 error = do_sys_quotactl_get(mp, 1185 args->u.get.qc_key, 1186 args->u.get.qc_val); 1187 break; 1188 case QUOTACTL_PUT: 1189 error = do_sys_quotactl_put(mp, 1190 args->u.put.qc_key, 1191 args->u.put.qc_val); 1192 break; 1193 case QUOTACTL_DEL: 1194 error = do_sys_quotactl_del(mp, args->u.del.qc_key); 1195 break; 1196 case QUOTACTL_CURSOROPEN: 1197 error = do_sys_quotactl_cursoropen(mp, 1198 args->u.cursoropen.qc_cursor); 1199 break; 1200 case QUOTACTL_CURSORCLOSE: 1201 error = do_sys_quotactl_cursorclose(mp, 1202 args->u.cursorclose.qc_cursor); 1203 break; 1204 case QUOTACTL_CURSORSKIPIDTYPE: 1205 error = do_sys_quotactl_cursorskipidtype(mp, 1206 args->u.cursorskipidtype.qc_cursor, 1207 args->u.cursorskipidtype.qc_idtype); 1208 break; 1209 case QUOTACTL_CURSORGET: 1210 error = do_sys_quotactl_cursorget(mp, 1211 args->u.cursorget.qc_cursor, 1212 args->u.cursorget.qc_keys, 1213 args->u.cursorget.qc_vals, 1214 args->u.cursorget.qc_maxnum, 1215 args->u.cursorget.qc_ret); 1216 break; 1217 case QUOTACTL_CURSORATEND: 1218 error = do_sys_quotactl_cursoratend(mp, 1219 args->u.cursoratend.qc_cursor, 1220 args->u.cursoratend.qc_ret); 1221 break; 1222 case QUOTACTL_CURSORREWIND: 1223 error = do_sys_quotactl_cursorrewind(mp, 1224 args->u.cursorrewind.qc_cursor); 1225 break; 1226 case QUOTACTL_QUOTAON: 1227 error = do_sys_quotactl_quotaon(mp, 1228 args->u.quotaon.qc_idtype, 1229 args->u.quotaon.qc_quotafile); 1230 break; 1231 case QUOTACTL_QUOTAOFF: 1232 error = do_sys_quotactl_quotaoff(mp, 1233 args->u.quotaoff.qc_idtype); 1234 break; 1235 default: 1236 error = EINVAL; 1237 break; 1238 } 1239 1240 vrele(vp); 1241 return error; 1242 } 1243 1244 /* ARGSUSED */ 1245 int 1246 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1247 register_t *retval) 1248 { 1249 /* { 1250 syscallarg(const char *) path; 1251 syscallarg(struct quotactl_args *) args; 1252 } */ 1253 struct quotactl_args args; 1254 int error; 1255 1256 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1257 if (error) { 1258 return error; 1259 } 1260 1261 return do_sys_quotactl(SCARG(uap, path), &args); 1262 } 1263 1264 int 1265 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1266 int root) 1267 { 1268 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1269 bool chrooted; 1270 int error = 0; 1271 1272 KASSERT(l == curlwp); 1273 1274 /* 1275 * This is safe unlocked. cwdi_rdir never goes non-NULL -> NULL, 1276 * since it would imply chroots can be escaped. Just make sure this 1277 * routine is self-consistent. 1278 */ 1279 chrooted = (atomic_load_relaxed(&cwdi->cwdi_rdir) != NULL); 1280 1281 /* 1282 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1283 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1284 * overrides MNT_NOWAIT. 1285 */ 1286 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1287 (flags != MNT_WAIT && flags != 0)) { 1288 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1289 } else { 1290 /* Get the filesystem stats now */ 1291 memset(sp, 0, sizeof(*sp)); 1292 if ((error = VFS_STATVFS(mp, sp)) != 0) 1293 return error; 1294 if (!chrooted) 1295 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1296 } 1297 1298 if (chrooted) { 1299 size_t len; 1300 char *bp; 1301 char c; 1302 char *path = PNBUF_GET(); 1303 1304 bp = path + MAXPATHLEN; 1305 *--bp = '\0'; 1306 rw_enter(&cwdi->cwdi_lock, RW_READER); 1307 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1308 MAXPATHLEN / 2, 0, l); 1309 rw_exit(&cwdi->cwdi_lock); 1310 if (error) { 1311 PNBUF_PUT(path); 1312 return error; 1313 } 1314 len = strlen(bp); 1315 if (len != 1) { 1316 /* 1317 * for mount points that are below our root, we can see 1318 * them, so we fix up the pathname and return them. The 1319 * rest we cannot see, so we don't allow viewing the 1320 * data. 1321 */ 1322 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1323 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1324 (void)strlcpy(sp->f_mntonname, 1325 c == '\0' ? "/" : &sp->f_mntonname[len], 1326 sizeof(sp->f_mntonname)); 1327 } else { 1328 if (root) 1329 (void)strlcpy(sp->f_mntonname, "/", 1330 sizeof(sp->f_mntonname)); 1331 else 1332 error = EPERM; 1333 } 1334 } 1335 PNBUF_PUT(path); 1336 } 1337 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1338 return error; 1339 } 1340 1341 /* 1342 * Get filesystem statistics by path. 1343 */ 1344 int 1345 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1346 { 1347 struct mount *mp; 1348 int error; 1349 struct vnode *vp; 1350 1351 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1352 if (error != 0) 1353 return error; 1354 mp = vp->v_mount; 1355 error = dostatvfs(mp, sb, l, flags, 1); 1356 vrele(vp); 1357 return error; 1358 } 1359 1360 /* ARGSUSED */ 1361 int 1362 sys___statvfs190(struct lwp *l, const struct sys___statvfs190_args *uap, 1363 register_t *retval) 1364 { 1365 /* { 1366 syscallarg(const char *) path; 1367 syscallarg(struct statvfs *) buf; 1368 syscallarg(int) flags; 1369 } */ 1370 struct statvfs *sb; 1371 int error; 1372 1373 sb = STATVFSBUF_GET(); 1374 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1375 if (error == 0) 1376 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1377 STATVFSBUF_PUT(sb); 1378 return error; 1379 } 1380 1381 /* 1382 * Get filesystem statistics by fd. 1383 */ 1384 int 1385 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1386 { 1387 file_t *fp; 1388 struct mount *mp; 1389 int error; 1390 1391 /* fd_getvnode() will use the descriptor for us */ 1392 if ((error = fd_getvnode(fd, &fp)) != 0) 1393 return (error); 1394 mp = fp->f_vnode->v_mount; 1395 error = dostatvfs(mp, sb, curlwp, flags, 1); 1396 fd_putfile(fd); 1397 return error; 1398 } 1399 1400 /* ARGSUSED */ 1401 int 1402 sys___fstatvfs190(struct lwp *l, const struct sys___fstatvfs190_args *uap, 1403 register_t *retval) 1404 { 1405 /* { 1406 syscallarg(int) fd; 1407 syscallarg(struct statvfs *) buf; 1408 syscallarg(int) flags; 1409 } */ 1410 struct statvfs *sb; 1411 int error; 1412 1413 sb = STATVFSBUF_GET(); 1414 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1415 if (error == 0) 1416 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1417 STATVFSBUF_PUT(sb); 1418 return error; 1419 } 1420 1421 /* 1422 * Get statistics on all filesystems. 1423 */ 1424 int 1425 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1426 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1427 register_t *retval) 1428 { 1429 int root = 0; 1430 mount_iterator_t *iter; 1431 struct proc *p = l->l_proc; 1432 struct mount *mp; 1433 struct statvfs *sb; 1434 size_t count, maxcount; 1435 int error = 0; 1436 1437 sb = STATVFSBUF_GET(); 1438 maxcount = bufsize / entry_sz; 1439 count = 0; 1440 mountlist_iterator_init(&iter); 1441 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1442 if (sfsp && count < maxcount) { 1443 error = dostatvfs(mp, sb, l, flags, 0); 1444 if (error) { 1445 error = 0; 1446 continue; 1447 } 1448 error = copyfn(sb, sfsp, entry_sz); 1449 if (error) 1450 goto out; 1451 sfsp = (char *)sfsp + entry_sz; 1452 root |= strcmp(sb->f_mntonname, "/") == 0; 1453 } 1454 count++; 1455 } 1456 1457 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1458 /* 1459 * fake a root entry 1460 */ 1461 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1462 sb, l, flags, 1); 1463 if (error != 0) 1464 goto out; 1465 if (sfsp) { 1466 error = copyfn(sb, sfsp, entry_sz); 1467 if (error != 0) 1468 goto out; 1469 } 1470 count++; 1471 } 1472 if (sfsp && count > maxcount) 1473 *retval = maxcount; 1474 else 1475 *retval = count; 1476 out: 1477 mountlist_iterator_destroy(iter); 1478 STATVFSBUF_PUT(sb); 1479 return error; 1480 } 1481 1482 int 1483 sys___getvfsstat90(struct lwp *l, const struct sys___getvfsstat90_args *uap, 1484 register_t *retval) 1485 { 1486 /* { 1487 syscallarg(struct statvfs *) buf; 1488 syscallarg(size_t) bufsize; 1489 syscallarg(int) flags; 1490 } */ 1491 1492 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1493 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1494 } 1495 1496 /* 1497 * Change current working directory to a given file descriptor. 1498 */ 1499 int 1500 do_sys_fchdir(struct lwp *l, int fd, register_t *retval) 1501 { 1502 struct proc *p = l->l_proc; 1503 struct cwdinfo *cwdi; 1504 struct vnode *vp, *tdp; 1505 struct mount *mp; 1506 file_t *fp; 1507 int error; 1508 1509 /* fd_getvnode() will use the descriptor for us */ 1510 if ((error = fd_getvnode(fd, &fp)) != 0) 1511 return error; 1512 vp = fp->f_vnode; 1513 1514 vref(vp); 1515 vn_lock(vp, LK_SHARED | LK_RETRY); 1516 if (vp->v_type != VDIR) 1517 error = ENOTDIR; 1518 else 1519 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1520 if (error) { 1521 vput(vp); 1522 goto out; 1523 } 1524 while ((mp = vp->v_mountedhere) != NULL) { 1525 error = vfs_busy(mp); 1526 vput(vp); 1527 if (error != 0) 1528 goto out; 1529 error = VFS_ROOT(mp, LK_SHARED, &tdp); 1530 vfs_unbusy(mp); 1531 if (error) 1532 goto out; 1533 vp = tdp; 1534 } 1535 VOP_UNLOCK(vp); 1536 1537 /* 1538 * Disallow changing to a directory not under the process's 1539 * current root directory (if there is one). 1540 */ 1541 cwdi = p->p_cwdi; 1542 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1543 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1544 vrele(vp); 1545 error = EPERM; /* operation not permitted */ 1546 } else { 1547 vrele(cwdi->cwdi_cdir); 1548 cwdi->cwdi_cdir = vp; 1549 } 1550 rw_exit(&cwdi->cwdi_lock); 1551 1552 out: 1553 fd_putfile(fd); 1554 return error; 1555 } 1556 1557 /* 1558 * Change current working directory to a given file descriptor. 1559 */ 1560 /* ARGSUSED */ 1561 int 1562 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, 1563 register_t *retval) 1564 { 1565 /* { 1566 syscallarg(int) fd; 1567 } */ 1568 1569 return do_sys_fchdir(l, SCARG(uap, fd), retval); 1570 } 1571 1572 /* 1573 * Change this process's notion of the root directory to a given file 1574 * descriptor. 1575 */ 1576 int 1577 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, 1578 register_t *retval) 1579 { 1580 struct vnode *vp; 1581 file_t *fp; 1582 int error, fd = SCARG(uap, fd); 1583 1584 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1585 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1586 return error; 1587 /* fd_getvnode() will use the descriptor for us */ 1588 if ((error = fd_getvnode(fd, &fp)) != 0) 1589 return error; 1590 vp = fp->f_vnode; 1591 vn_lock(vp, LK_SHARED | LK_RETRY); 1592 if (vp->v_type != VDIR) 1593 error = ENOTDIR; 1594 else 1595 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1596 VOP_UNLOCK(vp); 1597 if (error) 1598 goto out; 1599 vref(vp); 1600 change_root(vp); 1601 1602 out: 1603 fd_putfile(fd); 1604 return (error); 1605 } 1606 1607 /* 1608 * Change current working directory (``.''). 1609 */ 1610 int 1611 do_sys_chdir(struct lwp *l, const char *path, enum uio_seg seg, 1612 register_t *retval) 1613 { 1614 struct proc *p = l->l_proc; 1615 struct cwdinfo * cwdi; 1616 int error; 1617 struct vnode *vp; 1618 1619 if ((error = chdir_lookup(path, seg, &vp, l)) != 0) 1620 return error; 1621 cwdi = p->p_cwdi; 1622 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1623 vrele(cwdi->cwdi_cdir); 1624 cwdi->cwdi_cdir = vp; 1625 rw_exit(&cwdi->cwdi_lock); 1626 return 0; 1627 } 1628 1629 /* 1630 * Change current working directory (``.''). 1631 */ 1632 /* ARGSUSED */ 1633 int 1634 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1635 { 1636 /* { 1637 syscallarg(const char *) path; 1638 } */ 1639 1640 return do_sys_chdir(l, SCARG(uap, path), UIO_USERSPACE, retval); 1641 } 1642 1643 /* 1644 * Change notion of root (``/'') directory. 1645 */ 1646 /* ARGSUSED */ 1647 int 1648 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, 1649 register_t *retval) 1650 { 1651 /* { 1652 syscallarg(const char *) path; 1653 } */ 1654 int error; 1655 struct vnode *vp; 1656 1657 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1658 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1659 return (error); 1660 1661 error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); 1662 if (error == 0) 1663 change_root(vp); 1664 return error; 1665 } 1666 1667 /* 1668 * Common routine for chroot and fchroot. 1669 * NB: callers need to properly authorize the change root operation. 1670 */ 1671 void 1672 change_root(struct vnode *vp) 1673 { 1674 kauth_cred_t ncred; 1675 struct lwp *l = curlwp; 1676 struct proc *p = l->l_proc; 1677 struct cwdinfo *cwdi = p->p_cwdi; 1678 1679 ncred = kauth_cred_alloc(); 1680 1681 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1682 if (cwdi->cwdi_rdir != NULL) 1683 vrele(cwdi->cwdi_rdir); 1684 cwdi->cwdi_rdir = vp; 1685 1686 /* 1687 * Prevent escaping from chroot by putting the root under 1688 * the working directory. Silently chdir to / if we aren't 1689 * already there. 1690 */ 1691 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1692 /* 1693 * XXX would be more failsafe to change directory to a 1694 * deadfs node here instead 1695 */ 1696 vrele(cwdi->cwdi_cdir); 1697 vref(vp); 1698 cwdi->cwdi_cdir = vp; 1699 } 1700 rw_exit(&cwdi->cwdi_lock); 1701 1702 /* Get a write lock on the process credential. */ 1703 proc_crmod_enter(); 1704 1705 kauth_cred_clone(p->p_cred, ncred); 1706 kauth_proc_chroot(ncred, p->p_cwdi); 1707 1708 /* Broadcast our credentials to the process and other LWPs. */ 1709 proc_crmod_leave(ncred, p->p_cred, true); 1710 } 1711 1712 /* 1713 * Common routine for chroot and chdir. 1714 * XXX "where" should be enum uio_seg 1715 */ 1716 int 1717 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1718 { 1719 struct pathbuf *pb; 1720 struct nameidata nd; 1721 int error; 1722 1723 error = pathbuf_maybe_copyin(path, where, &pb); 1724 if (error) { 1725 return error; 1726 } 1727 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); 1728 if ((error = namei(&nd)) != 0) { 1729 pathbuf_destroy(pb); 1730 return error; 1731 } 1732 *vpp = nd.ni_vp; 1733 pathbuf_destroy(pb); 1734 1735 if ((*vpp)->v_type != VDIR) 1736 error = ENOTDIR; 1737 else 1738 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1739 1740 if (error) 1741 vput(*vpp); 1742 else 1743 VOP_UNLOCK(*vpp); 1744 return (error); 1745 } 1746 1747 /* 1748 * Internals of sys_open - path has already been converted into a pathbuf 1749 * (so we can easily reuse this function from other parts of the kernel, 1750 * like posix_spawn post-processing). 1751 */ 1752 int 1753 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1754 int open_mode, int *fd) 1755 { 1756 struct proc *p = l->l_proc; 1757 struct cwdinfo *cwdi = p->p_cwdi; 1758 file_t *fp; 1759 struct vnode *vp; 1760 int dupfd; 1761 bool dupfd_move; 1762 int flags, cmode; 1763 int indx, error; 1764 1765 if (open_flags & O_SEARCH) { 1766 open_flags &= ~(int)O_SEARCH; 1767 } 1768 1769 /* 1770 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1771 * may be specified. 1772 */ 1773 if ((open_flags & O_EXEC) && (open_flags & O_ACCMODE)) 1774 return EINVAL; 1775 1776 flags = FFLAGS(open_flags); 1777 if ((flags & (FREAD | FWRITE)) == 0) 1778 return EINVAL; 1779 1780 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1781 return error; 1782 } 1783 1784 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1785 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1786 1787 error = vn_open(dvp, pb, TRYEMULROOT, flags, cmode, 1788 &vp, &dupfd_move, &dupfd); 1789 if (error != 0) { 1790 fd_abort(p, fp, indx); 1791 return error; 1792 } 1793 1794 if (vp == NULL) { 1795 fd_abort(p, fp, indx); 1796 error = fd_dupopen(dupfd, dupfd_move, flags, &indx); 1797 if (error) 1798 return error; 1799 *fd = indx; 1800 } else { 1801 error = open_setfp(l, fp, vp, indx, flags); 1802 if (error) 1803 return error; 1804 VOP_UNLOCK(vp); 1805 *fd = indx; 1806 fd_affix(p, fp, indx); 1807 } 1808 1809 return 0; 1810 } 1811 1812 int 1813 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1814 { 1815 struct pathbuf *pb; 1816 int error, oflags; 1817 1818 oflags = FFLAGS(open_flags); 1819 if ((oflags & (FREAD | FWRITE)) == 0) 1820 return EINVAL; 1821 1822 pb = pathbuf_create(path); 1823 if (pb == NULL) 1824 return ENOMEM; 1825 1826 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1827 pathbuf_destroy(pb); 1828 1829 return error; 1830 } 1831 1832 static int 1833 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1834 int mode, int *fd) 1835 { 1836 file_t *dfp = NULL; 1837 struct vnode *dvp = NULL; 1838 struct pathbuf *pb; 1839 const char *pathstring = NULL; 1840 int error; 1841 1842 if (path == NULL) { 1843 MODULE_HOOK_CALL(vfs_openat_10_hook, (&pb), enosys(), error); 1844 if (error == ENOSYS) 1845 goto no_compat; 1846 if (error) 1847 return error; 1848 } else { 1849 no_compat: 1850 error = pathbuf_copyin(path, &pb); 1851 if (error) 1852 return error; 1853 } 1854 1855 pathstring = pathbuf_stringcopy_get(pb); 1856 1857 /* 1858 * fdat is ignored if: 1859 * 1) if fdat is AT_FDCWD, which means use current directory as base. 1860 * 2) if path is absolute, then fdat is useless. 1861 */ 1862 if (fdat != AT_FDCWD && pathstring[0] != '/') { 1863 /* fd_getvnode() will use the descriptor for us */ 1864 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1865 goto out; 1866 1867 dvp = dfp->f_vnode; 1868 } 1869 1870 error = do_open(l, dvp, pb, flags, mode, fd); 1871 1872 if (dfp != NULL) 1873 fd_putfile(fdat); 1874 out: 1875 pathbuf_stringcopy_put(pb, pathstring); 1876 pathbuf_destroy(pb); 1877 return error; 1878 } 1879 1880 int 1881 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1882 { 1883 /* { 1884 syscallarg(const char *) path; 1885 syscallarg(int) flags; 1886 syscallarg(int) mode; 1887 } */ 1888 int error; 1889 int fd; 1890 1891 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1892 SCARG(uap, flags), SCARG(uap, mode), &fd); 1893 1894 if (error == 0) 1895 *retval = fd; 1896 1897 return error; 1898 } 1899 1900 int 1901 sys_openat(struct lwp *l, const struct sys_openat_args *uap, 1902 register_t *retval) 1903 { 1904 /* { 1905 syscallarg(int) fd; 1906 syscallarg(const char *) path; 1907 syscallarg(int) oflags; 1908 syscallarg(int) mode; 1909 } */ 1910 int error; 1911 int fd; 1912 1913 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1914 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1915 1916 if (error == 0) 1917 *retval = fd; 1918 1919 return error; 1920 } 1921 1922 static void 1923 vfs__fhfree(fhandle_t *fhp) 1924 { 1925 size_t fhsize; 1926 1927 fhsize = FHANDLE_SIZE(fhp); 1928 kmem_free(fhp, fhsize); 1929 } 1930 1931 /* 1932 * vfs_composefh: compose a filehandle. 1933 */ 1934 1935 int 1936 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1937 { 1938 struct mount *mp; 1939 struct fid *fidp; 1940 int error; 1941 size_t needfhsize; 1942 size_t fidsize; 1943 1944 mp = vp->v_mount; 1945 fidp = NULL; 1946 if (*fh_size < FHANDLE_SIZE_MIN) { 1947 fidsize = 0; 1948 } else { 1949 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1950 if (fhp != NULL) { 1951 memset(fhp, 0, *fh_size); 1952 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1953 fidp = &fhp->fh_fid; 1954 } 1955 } 1956 error = VFS_VPTOFH(vp, fidp, &fidsize); 1957 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1958 if (error == 0 && *fh_size < needfhsize) { 1959 error = E2BIG; 1960 } 1961 *fh_size = needfhsize; 1962 return error; 1963 } 1964 1965 int 1966 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1967 { 1968 struct mount *mp; 1969 fhandle_t *fhp; 1970 size_t fhsize; 1971 size_t fidsize; 1972 int error; 1973 1974 mp = vp->v_mount; 1975 fidsize = 0; 1976 error = VFS_VPTOFH(vp, NULL, &fidsize); 1977 KASSERT(error != 0); 1978 if (error != E2BIG) { 1979 goto out; 1980 } 1981 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1982 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1983 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1984 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1985 if (error == 0) { 1986 KASSERT(FHANDLE_SIZE(fhp) == fhsize); 1987 KASSERT(FHANDLE_FILEID(fhp)->fid_len == fidsize); 1988 *fhpp = fhp; 1989 } else { 1990 kmem_free(fhp, fhsize); 1991 } 1992 out: 1993 return error; 1994 } 1995 1996 void 1997 vfs_composefh_free(fhandle_t *fhp) 1998 { 1999 2000 vfs__fhfree(fhp); 2001 } 2002 2003 /* 2004 * vfs_fhtovp: lookup a vnode by a filehandle. 2005 */ 2006 2007 int 2008 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 2009 { 2010 struct mount *mp; 2011 int error; 2012 2013 *vpp = NULL; 2014 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 2015 if (mp == NULL) { 2016 error = ESTALE; 2017 goto out; 2018 } 2019 if (mp->mnt_op->vfs_fhtovp == NULL) { 2020 error = EOPNOTSUPP; 2021 goto out; 2022 } 2023 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), LK_EXCLUSIVE, vpp); 2024 out: 2025 return error; 2026 } 2027 2028 /* 2029 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 2030 * the needed size. 2031 */ 2032 2033 int 2034 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 2035 { 2036 fhandle_t *fhp; 2037 int error; 2038 2039 if (fhsize > FHANDLE_SIZE_MAX) { 2040 return EINVAL; 2041 } 2042 if (fhsize < FHANDLE_SIZE_MIN) { 2043 return EINVAL; 2044 } 2045 again: 2046 fhp = kmem_alloc(fhsize, KM_SLEEP); 2047 error = copyin(ufhp, fhp, fhsize); 2048 if (error == 0) { 2049 /* XXX this check shouldn't be here */ 2050 if (FHANDLE_SIZE(fhp) == fhsize) { 2051 *fhpp = fhp; 2052 return 0; 2053 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 2054 /* 2055 * a kludge for nfsv2 padded handles. 2056 */ 2057 size_t sz; 2058 2059 sz = FHANDLE_SIZE(fhp); 2060 kmem_free(fhp, fhsize); 2061 fhsize = sz; 2062 goto again; 2063 } else { 2064 /* 2065 * userland told us wrong size. 2066 */ 2067 error = EINVAL; 2068 } 2069 } 2070 kmem_free(fhp, fhsize); 2071 return error; 2072 } 2073 2074 void 2075 vfs_copyinfh_free(fhandle_t *fhp) 2076 { 2077 2078 vfs__fhfree(fhp); 2079 } 2080 2081 /* 2082 * Get file handle system call 2083 */ 2084 int 2085 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, 2086 register_t *retval) 2087 { 2088 /* { 2089 syscallarg(char *) fname; 2090 syscallarg(fhandle_t *) fhp; 2091 syscallarg(size_t *) fh_size; 2092 } */ 2093 struct vnode *vp; 2094 fhandle_t *fh; 2095 int error; 2096 struct pathbuf *pb; 2097 struct nameidata nd; 2098 size_t sz; 2099 size_t usz; 2100 2101 /* 2102 * Must be super user 2103 */ 2104 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2105 0, NULL, NULL, NULL); 2106 if (error) 2107 return (error); 2108 2109 error = pathbuf_copyin(SCARG(uap, fname), &pb); 2110 if (error) { 2111 return error; 2112 } 2113 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2114 error = namei(&nd); 2115 if (error) { 2116 pathbuf_destroy(pb); 2117 return error; 2118 } 2119 vp = nd.ni_vp; 2120 pathbuf_destroy(pb); 2121 2122 error = vfs_composefh_alloc(vp, &fh); 2123 vput(vp); 2124 if (error != 0) { 2125 return error; 2126 } 2127 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 2128 if (error != 0) { 2129 goto out; 2130 } 2131 sz = FHANDLE_SIZE(fh); 2132 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 2133 if (error != 0) { 2134 goto out; 2135 } 2136 if (usz >= sz) { 2137 error = copyout(fh, SCARG(uap, fhp), sz); 2138 } else { 2139 error = E2BIG; 2140 } 2141 out: 2142 vfs_composefh_free(fh); 2143 return (error); 2144 } 2145 2146 /* 2147 * Open a file given a file handle. 2148 * 2149 * Check permissions, allocate an open file structure, 2150 * and call the device open routine if any. 2151 */ 2152 2153 int 2154 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 2155 register_t *retval) 2156 { 2157 file_t *fp; 2158 struct vnode *vp = NULL; 2159 kauth_cred_t cred = l->l_cred; 2160 file_t *nfp; 2161 int indx, error; 2162 struct vattr va; 2163 fhandle_t *fh; 2164 int flags; 2165 proc_t *p; 2166 2167 p = curproc; 2168 2169 /* 2170 * Must be super user 2171 */ 2172 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2173 0, NULL, NULL, NULL))) 2174 return (error); 2175 2176 if (oflags & O_SEARCH) { 2177 oflags &= ~(int)O_SEARCH; 2178 } 2179 2180 flags = FFLAGS(oflags); 2181 if ((flags & (FREAD | FWRITE)) == 0) 2182 return (EINVAL); 2183 if ((flags & O_CREAT)) 2184 return (EINVAL); 2185 if ((error = fd_allocfile(&nfp, &indx)) != 0) 2186 return (error); 2187 fp = nfp; 2188 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2189 if (error != 0) { 2190 goto bad; 2191 } 2192 error = vfs_fhtovp(fh, &vp); 2193 vfs_copyinfh_free(fh); 2194 if (error != 0) { 2195 goto bad; 2196 } 2197 2198 /* Now do an effective vn_open */ 2199 2200 if (vp->v_type == VSOCK) { 2201 error = EOPNOTSUPP; 2202 goto bad; 2203 } 2204 error = vn_openchk(vp, cred, flags); 2205 if (error != 0) 2206 goto bad; 2207 if (flags & O_TRUNC) { 2208 VOP_UNLOCK(vp); /* XXX */ 2209 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2210 vattr_null(&va); 2211 va.va_size = 0; 2212 error = VOP_SETATTR(vp, &va, cred); 2213 if (error) 2214 goto bad; 2215 } 2216 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2217 goto bad; 2218 if (flags & FWRITE) { 2219 mutex_enter(vp->v_interlock); 2220 vp->v_writecount++; 2221 mutex_exit(vp->v_interlock); 2222 } 2223 2224 /* done with modified vn_open, now finish what sys_open does. */ 2225 if ((error = open_setfp(l, fp, vp, indx, flags))) 2226 return error; 2227 2228 VOP_UNLOCK(vp); 2229 *retval = indx; 2230 fd_affix(p, fp, indx); 2231 return (0); 2232 2233 bad: 2234 fd_abort(p, fp, indx); 2235 if (vp != NULL) 2236 vput(vp); 2237 if (error == EDUPFD || error == EMOVEFD) { 2238 /* XXX should probably close curlwp->l_dupfd */ 2239 error = EOPNOTSUPP; 2240 } 2241 return (error); 2242 } 2243 2244 int 2245 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, 2246 register_t *retval) 2247 { 2248 /* { 2249 syscallarg(const void *) fhp; 2250 syscallarg(size_t) fh_size; 2251 syscallarg(int) flags; 2252 } */ 2253 2254 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2255 SCARG(uap, flags), retval); 2256 } 2257 2258 int 2259 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2260 { 2261 int error; 2262 fhandle_t *fh; 2263 struct vnode *vp; 2264 2265 /* 2266 * Must be super user 2267 */ 2268 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2269 0, NULL, NULL, NULL))) 2270 return error; 2271 2272 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2273 if (error != 0) 2274 return error; 2275 2276 error = vfs_fhtovp(fh, &vp); 2277 vfs_copyinfh_free(fh); 2278 if (error != 0) 2279 return error; 2280 2281 error = vn_stat(vp, sb); 2282 vput(vp); 2283 return error; 2284 } 2285 2286 /* ARGSUSED */ 2287 int 2288 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, 2289 register_t *retval) 2290 { 2291 /* { 2292 syscallarg(const void *) fhp; 2293 syscallarg(size_t) fh_size; 2294 syscallarg(struct stat *) sb; 2295 } */ 2296 struct stat sb; 2297 int error; 2298 2299 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2300 if (error) 2301 return error; 2302 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2303 } 2304 2305 int 2306 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, 2307 struct statvfs *sb, int flags) 2308 { 2309 fhandle_t *fh; 2310 struct mount *mp; 2311 struct vnode *vp; 2312 int error; 2313 2314 /* 2315 * Must be super user 2316 */ 2317 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2318 0, NULL, NULL, NULL))) 2319 return error; 2320 2321 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2322 if (error != 0) 2323 return error; 2324 2325 error = vfs_fhtovp(fh, &vp); 2326 vfs_copyinfh_free(fh); 2327 if (error != 0) 2328 return error; 2329 2330 mp = vp->v_mount; 2331 error = dostatvfs(mp, sb, l, flags, 1); 2332 vput(vp); 2333 return error; 2334 } 2335 2336 /* ARGSUSED */ 2337 int 2338 sys___fhstatvfs190(struct lwp *l, const struct sys___fhstatvfs190_args *uap, 2339 register_t *retval) 2340 { 2341 /* { 2342 syscallarg(const void *) fhp; 2343 syscallarg(size_t) fh_size; 2344 syscallarg(struct statvfs *) buf; 2345 syscallarg(int) flags; 2346 } */ 2347 struct statvfs *sb = STATVFSBUF_GET(); 2348 int error; 2349 2350 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2351 SCARG(uap, flags)); 2352 if (error == 0) 2353 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2354 STATVFSBUF_PUT(sb); 2355 return error; 2356 } 2357 2358 int 2359 do_posix_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2360 dev_t dev) 2361 { 2362 2363 /* 2364 * The POSIX mknod(2) call is an alias for mkfifo(2) for S_IFIFO 2365 * in mode and dev=0. 2366 * 2367 * In all the other cases it's implementation defined behavior. 2368 */ 2369 2370 if ((mode & S_IFIFO) && dev == 0) 2371 return do_sys_mkfifoat(l, fdat, pathname, mode); 2372 else 2373 return do_sys_mknodat(l, fdat, pathname, mode, dev, 2374 UIO_USERSPACE); 2375 } 2376 2377 /* 2378 * Create a special file. 2379 */ 2380 /* ARGSUSED */ 2381 int 2382 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2383 register_t *retval) 2384 { 2385 /* { 2386 syscallarg(const char *) path; 2387 syscallarg(mode_t) mode; 2388 syscallarg(dev_t) dev; 2389 } */ 2390 return do_posix_mknodat(l, AT_FDCWD, SCARG(uap, path), 2391 SCARG(uap, mode), SCARG(uap, dev)); 2392 } 2393 2394 int 2395 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2396 register_t *retval) 2397 { 2398 /* { 2399 syscallarg(int) fd; 2400 syscallarg(const char *) path; 2401 syscallarg(mode_t) mode; 2402 syscallarg(int) pad; 2403 syscallarg(dev_t) dev; 2404 } */ 2405 2406 return do_posix_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2407 SCARG(uap, mode), SCARG(uap, dev)); 2408 } 2409 2410 int 2411 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2412 enum uio_seg seg) 2413 { 2414 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, seg); 2415 } 2416 2417 int 2418 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2419 dev_t dev, enum uio_seg seg) 2420 { 2421 struct proc *p = l->l_proc; 2422 struct vnode *vp; 2423 struct vattr vattr; 2424 int error, optype; 2425 struct pathbuf *pb; 2426 struct nameidata nd; 2427 const char *pathstring; 2428 2429 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2430 0, NULL, NULL, NULL)) != 0) 2431 return (error); 2432 2433 optype = VOP_MKNOD_DESCOFFSET; 2434 2435 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2436 if (error) { 2437 return error; 2438 } 2439 pathstring = pathbuf_stringcopy_get(pb); 2440 if (pathstring == NULL) { 2441 pathbuf_destroy(pb); 2442 return ENOMEM; 2443 } 2444 2445 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2446 2447 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2448 goto out; 2449 vp = nd.ni_vp; 2450 2451 if (vp != NULL) 2452 error = EEXIST; 2453 else { 2454 vattr_null(&vattr); 2455 /* We will read cwdi->cwdi_cmask unlocked. */ 2456 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2457 vattr.va_rdev = dev; 2458 2459 switch (mode & S_IFMT) { 2460 case S_IFMT: /* used by badsect to flag bad sectors */ 2461 vattr.va_type = VBAD; 2462 break; 2463 case S_IFCHR: 2464 vattr.va_type = VCHR; 2465 break; 2466 case S_IFBLK: 2467 vattr.va_type = VBLK; 2468 break; 2469 case S_IFWHT: 2470 optype = VOP_WHITEOUT_DESCOFFSET; 2471 break; 2472 case S_IFREG: 2473 #if NVERIEXEC > 0 2474 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2475 O_CREAT); 2476 #endif /* NVERIEXEC > 0 */ 2477 vattr.va_type = VREG; 2478 vattr.va_rdev = VNOVAL; 2479 optype = VOP_CREATE_DESCOFFSET; 2480 break; 2481 default: 2482 error = EINVAL; 2483 break; 2484 } 2485 2486 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET && 2487 vattr.va_rdev == VNOVAL) 2488 error = EINVAL; 2489 } 2490 2491 if (!error) { 2492 switch (optype) { 2493 case VOP_WHITEOUT_DESCOFFSET: 2494 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2495 if (error) 2496 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2497 vput(nd.ni_dvp); 2498 break; 2499 2500 case VOP_MKNOD_DESCOFFSET: 2501 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2502 &nd.ni_cnd, &vattr); 2503 if (error == 0) 2504 vrele(nd.ni_vp); 2505 vput(nd.ni_dvp); 2506 break; 2507 2508 case VOP_CREATE_DESCOFFSET: 2509 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2510 &nd.ni_cnd, &vattr); 2511 if (error == 0) 2512 vrele(nd.ni_vp); 2513 vput(nd.ni_dvp); 2514 break; 2515 } 2516 } else { 2517 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2518 if (nd.ni_dvp == vp) 2519 vrele(nd.ni_dvp); 2520 else 2521 vput(nd.ni_dvp); 2522 if (vp) 2523 vrele(vp); 2524 } 2525 out: 2526 pathbuf_stringcopy_put(pb, pathstring); 2527 pathbuf_destroy(pb); 2528 return (error); 2529 } 2530 2531 /* 2532 * Create a named pipe. 2533 */ 2534 /* ARGSUSED */ 2535 int 2536 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, 2537 register_t *retval) 2538 { 2539 /* { 2540 syscallarg(const char *) path; 2541 syscallarg(int) mode; 2542 } */ 2543 2544 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), 2545 SCARG(uap, mode)); 2546 } 2547 2548 int 2549 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2550 register_t *retval) 2551 { 2552 /* { 2553 syscallarg(int) fd; 2554 syscallarg(const char *) path; 2555 syscallarg(int) mode; 2556 } */ 2557 2558 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2559 SCARG(uap, mode)); 2560 } 2561 2562 static int 2563 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2564 { 2565 struct proc *p = l->l_proc; 2566 struct vattr vattr; 2567 int error; 2568 struct pathbuf *pb; 2569 struct nameidata nd; 2570 2571 error = pathbuf_copyin(path, &pb); 2572 if (error) { 2573 return error; 2574 } 2575 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2576 2577 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2578 pathbuf_destroy(pb); 2579 return error; 2580 } 2581 if (nd.ni_vp != NULL) { 2582 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2583 if (nd.ni_dvp == nd.ni_vp) 2584 vrele(nd.ni_dvp); 2585 else 2586 vput(nd.ni_dvp); 2587 vrele(nd.ni_vp); 2588 pathbuf_destroy(pb); 2589 return (EEXIST); 2590 } 2591 vattr_null(&vattr); 2592 vattr.va_type = VFIFO; 2593 /* We will read cwdi->cwdi_cmask unlocked. */ 2594 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2595 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2596 if (error == 0) 2597 vrele(nd.ni_vp); 2598 vput(nd.ni_dvp); 2599 pathbuf_destroy(pb); 2600 return (error); 2601 } 2602 2603 /* 2604 * Make a hard file link. 2605 */ 2606 /* ARGSUSED */ 2607 int 2608 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2609 const char *link, int follow, register_t *retval) 2610 { 2611 struct vnode *vp; 2612 struct pathbuf *linkpb; 2613 struct nameidata nd; 2614 namei_simple_flags_t ns_flags; 2615 int error; 2616 2617 if (follow & AT_SYMLINK_FOLLOW) 2618 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2619 else 2620 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2621 2622 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2623 if (error != 0) 2624 return (error); 2625 error = pathbuf_copyin(link, &linkpb); 2626 if (error) { 2627 goto out1; 2628 } 2629 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2630 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2631 goto out2; 2632 if (nd.ni_vp) { 2633 error = EEXIST; 2634 goto abortop; 2635 } 2636 /* Prevent hard links on directories. */ 2637 if (vp->v_type == VDIR) { 2638 error = EPERM; 2639 goto abortop; 2640 } 2641 /* Prevent cross-mount operation. */ 2642 if (nd.ni_dvp->v_mount != vp->v_mount) { 2643 error = EXDEV; 2644 goto abortop; 2645 } 2646 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2647 VOP_UNLOCK(nd.ni_dvp); 2648 vrele(nd.ni_dvp); 2649 out2: 2650 pathbuf_destroy(linkpb); 2651 out1: 2652 vrele(vp); 2653 return (error); 2654 abortop: 2655 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2656 if (nd.ni_dvp == nd.ni_vp) 2657 vrele(nd.ni_dvp); 2658 else 2659 vput(nd.ni_dvp); 2660 if (nd.ni_vp != NULL) 2661 vrele(nd.ni_vp); 2662 goto out2; 2663 } 2664 2665 int 2666 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2667 { 2668 /* { 2669 syscallarg(const char *) path; 2670 syscallarg(const char *) link; 2671 } */ 2672 const char *path = SCARG(uap, path); 2673 const char *link = SCARG(uap, link); 2674 2675 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2676 AT_SYMLINK_FOLLOW, retval); 2677 } 2678 2679 int 2680 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2681 register_t *retval) 2682 { 2683 /* { 2684 syscallarg(int) fd1; 2685 syscallarg(const char *) name1; 2686 syscallarg(int) fd2; 2687 syscallarg(const char *) name2; 2688 syscallarg(int) flags; 2689 } */ 2690 int fd1 = SCARG(uap, fd1); 2691 const char *name1 = SCARG(uap, name1); 2692 int fd2 = SCARG(uap, fd2); 2693 const char *name2 = SCARG(uap, name2); 2694 int follow; 2695 2696 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2697 2698 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2699 } 2700 2701 int 2702 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2703 { 2704 2705 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2706 } 2707 2708 static int 2709 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2710 const char *link, enum uio_seg seg) 2711 { 2712 struct proc *p = curproc; 2713 struct vattr vattr; 2714 char *path; 2715 int error; 2716 size_t len; 2717 struct pathbuf *linkpb; 2718 struct nameidata nd; 2719 2720 KASSERT(l != NULL || fdat == AT_FDCWD); 2721 2722 path = PNBUF_GET(); 2723 if (seg == UIO_USERSPACE) { 2724 if ((error = copyinstr(patharg, path, MAXPATHLEN, &len)) != 0) 2725 goto out1; 2726 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2727 goto out1; 2728 } else { 2729 len = strlen(patharg) + 1; 2730 KASSERT(len <= MAXPATHLEN); 2731 memcpy(path, patharg, len); 2732 linkpb = pathbuf_create(link); 2733 if (linkpb == NULL) { 2734 error = ENOMEM; 2735 goto out1; 2736 } 2737 } 2738 ktrkuser("symlink-target", path, len - 1); 2739 2740 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2741 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2742 goto out2; 2743 if (nd.ni_vp) { 2744 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2745 if (nd.ni_dvp == nd.ni_vp) 2746 vrele(nd.ni_dvp); 2747 else 2748 vput(nd.ni_dvp); 2749 vrele(nd.ni_vp); 2750 error = EEXIST; 2751 goto out2; 2752 } 2753 vattr_null(&vattr); 2754 vattr.va_type = VLNK; 2755 /* We will read cwdi->cwdi_cmask unlocked. */ 2756 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2757 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2758 if (error == 0) 2759 vrele(nd.ni_vp); 2760 vput(nd.ni_dvp); 2761 out2: 2762 pathbuf_destroy(linkpb); 2763 out1: 2764 PNBUF_PUT(path); 2765 return (error); 2766 } 2767 2768 /* 2769 * Make a symbolic link. 2770 */ 2771 /* ARGSUSED */ 2772 int 2773 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2774 { 2775 /* { 2776 syscallarg(const char *) path; 2777 syscallarg(const char *) link; 2778 } */ 2779 2780 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2781 UIO_USERSPACE); 2782 } 2783 2784 int 2785 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2786 register_t *retval) 2787 { 2788 /* { 2789 syscallarg(const char *) path1; 2790 syscallarg(int) fd; 2791 syscallarg(const char *) path2; 2792 } */ 2793 2794 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2795 SCARG(uap, path2), UIO_USERSPACE); 2796 } 2797 2798 /* 2799 * Delete a whiteout from the filesystem. 2800 */ 2801 /* ARGSUSED */ 2802 int 2803 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, 2804 register_t *retval) 2805 { 2806 /* { 2807 syscallarg(const char *) path; 2808 } */ 2809 int error; 2810 struct pathbuf *pb; 2811 struct nameidata nd; 2812 2813 error = pathbuf_copyin(SCARG(uap, path), &pb); 2814 if (error) { 2815 return error; 2816 } 2817 2818 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2819 error = namei(&nd); 2820 if (error) { 2821 pathbuf_destroy(pb); 2822 return (error); 2823 } 2824 2825 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2826 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2827 if (nd.ni_dvp == nd.ni_vp) 2828 vrele(nd.ni_dvp); 2829 else 2830 vput(nd.ni_dvp); 2831 if (nd.ni_vp) 2832 vrele(nd.ni_vp); 2833 pathbuf_destroy(pb); 2834 return (EEXIST); 2835 } 2836 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2837 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2838 vput(nd.ni_dvp); 2839 pathbuf_destroy(pb); 2840 return (error); 2841 } 2842 2843 /* 2844 * Delete a name from the filesystem. 2845 */ 2846 /* ARGSUSED */ 2847 int 2848 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, 2849 register_t *retval) 2850 { 2851 /* { 2852 syscallarg(const char *) path; 2853 } */ 2854 2855 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, 2856 UIO_USERSPACE); 2857 } 2858 2859 int 2860 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2861 register_t *retval) 2862 { 2863 /* { 2864 syscallarg(int) fd; 2865 syscallarg(const char *) path; 2866 syscallarg(int) flag; 2867 } */ 2868 2869 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2870 SCARG(uap, flag), UIO_USERSPACE); 2871 } 2872 2873 int 2874 do_sys_unlink(const char *arg, enum uio_seg seg) 2875 { 2876 2877 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2878 } 2879 2880 static int 2881 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2882 enum uio_seg seg) 2883 { 2884 struct vnode *vp; 2885 int error; 2886 struct pathbuf *pb; 2887 struct nameidata nd; 2888 const char *pathstring; 2889 2890 KASSERT(l != NULL || fdat == AT_FDCWD); 2891 2892 error = pathbuf_maybe_copyin(arg, seg, &pb); 2893 if (error) { 2894 return error; 2895 } 2896 pathstring = pathbuf_stringcopy_get(pb); 2897 if (pathstring == NULL) { 2898 pathbuf_destroy(pb); 2899 return ENOMEM; 2900 } 2901 2902 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2903 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2904 goto out; 2905 vp = nd.ni_vp; 2906 2907 /* 2908 * The root of a mounted filesystem cannot be deleted. 2909 */ 2910 if ((vp->v_vflag & VV_ROOT) != 0) { 2911 error = EBUSY; 2912 goto abort; 2913 } 2914 2915 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2916 error = EBUSY; 2917 goto abort; 2918 } 2919 2920 /* 2921 * No rmdir "." please. 2922 */ 2923 if (nd.ni_dvp == vp) { 2924 error = EINVAL; 2925 goto abort; 2926 } 2927 2928 /* 2929 * AT_REMOVEDIR is required to remove a directory 2930 */ 2931 if (vp->v_type == VDIR) { 2932 if (!(flags & AT_REMOVEDIR)) { 2933 error = EPERM; 2934 goto abort; 2935 } else { 2936 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2937 vput(nd.ni_dvp); 2938 goto out; 2939 } 2940 } 2941 2942 /* 2943 * Starting here we only deal with non directories. 2944 */ 2945 if (flags & AT_REMOVEDIR) { 2946 error = ENOTDIR; 2947 goto abort; 2948 } 2949 2950 #if NVERIEXEC > 0 2951 /* Handle remove requests for veriexec entries. */ 2952 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2953 goto abort; 2954 } 2955 #endif /* NVERIEXEC > 0 */ 2956 2957 #ifdef FILEASSOC 2958 (void)fileassoc_file_delete(vp); 2959 #endif /* FILEASSOC */ 2960 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2961 vput(nd.ni_dvp); 2962 goto out; 2963 2964 abort: 2965 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2966 if (nd.ni_dvp == vp) 2967 vrele(nd.ni_dvp); 2968 else 2969 vput(nd.ni_dvp); 2970 vput(vp); 2971 2972 out: 2973 pathbuf_stringcopy_put(pb, pathstring); 2974 pathbuf_destroy(pb); 2975 return (error); 2976 } 2977 2978 /* 2979 * Reposition read/write file offset. 2980 */ 2981 int 2982 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2983 { 2984 /* { 2985 syscallarg(int) fd; 2986 syscallarg(int) pad; 2987 syscallarg(off_t) offset; 2988 syscallarg(int) whence; 2989 } */ 2990 file_t *fp; 2991 int error, fd; 2992 2993 switch (SCARG(uap, whence)) { 2994 case SEEK_CUR: 2995 case SEEK_END: 2996 case SEEK_SET: 2997 break; 2998 default: 2999 return EINVAL; 3000 } 3001 3002 fd = SCARG(uap, fd); 3003 3004 if ((fp = fd_getfile(fd)) == NULL) 3005 return (EBADF); 3006 3007 if (fp->f_ops->fo_seek == NULL) { 3008 error = ESPIPE; 3009 goto out; 3010 } 3011 3012 error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), 3013 SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); 3014 out: 3015 fd_putfile(fd); 3016 return (error); 3017 } 3018 3019 /* 3020 * Positional read system call. 3021 */ 3022 int 3023 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 3024 { 3025 /* { 3026 syscallarg(int) fd; 3027 syscallarg(void *) buf; 3028 syscallarg(size_t) nbyte; 3029 syscallarg(off_t) offset; 3030 } */ 3031 file_t *fp; 3032 off_t offset; 3033 int error, fd = SCARG(uap, fd); 3034 3035 if ((fp = fd_getfile(fd)) == NULL) 3036 return (EBADF); 3037 3038 if ((fp->f_flag & FREAD) == 0) { 3039 fd_putfile(fd); 3040 return (EBADF); 3041 } 3042 3043 if (fp->f_ops->fo_seek == NULL) { 3044 error = ESPIPE; 3045 goto out; 3046 } 3047 3048 offset = SCARG(uap, offset); 3049 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3050 if (error) 3051 goto out; 3052 3053 /* dofileread() will unuse the descriptor for us */ 3054 return dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3055 &offset, 0, retval); 3056 3057 out: 3058 fd_putfile(fd); 3059 return (error); 3060 } 3061 3062 /* 3063 * Positional scatter read system call. 3064 */ 3065 int 3066 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, 3067 register_t *retval) 3068 { 3069 /* { 3070 syscallarg(int) fd; 3071 syscallarg(const struct iovec *) iovp; 3072 syscallarg(int) iovcnt; 3073 syscallarg(off_t) offset; 3074 } */ 3075 off_t offset = SCARG(uap, offset); 3076 3077 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 3078 SCARG(uap, iovcnt), &offset, 0, retval); 3079 } 3080 3081 /* 3082 * Positional write system call. 3083 */ 3084 int 3085 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, 3086 register_t *retval) 3087 { 3088 /* { 3089 syscallarg(int) fd; 3090 syscallarg(const void *) buf; 3091 syscallarg(size_t) nbyte; 3092 syscallarg(off_t) offset; 3093 } */ 3094 file_t *fp; 3095 off_t offset; 3096 int error, fd = SCARG(uap, fd); 3097 3098 if ((fp = fd_getfile(fd)) == NULL) 3099 return (EBADF); 3100 3101 if ((fp->f_flag & FWRITE) == 0) { 3102 fd_putfile(fd); 3103 return (EBADF); 3104 } 3105 3106 if (fp->f_ops->fo_seek == NULL) { 3107 error = ESPIPE; 3108 goto out; 3109 } 3110 3111 offset = SCARG(uap, offset); 3112 error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); 3113 if (error) 3114 goto out; 3115 3116 /* dofilewrite() will unuse the descriptor for us */ 3117 return dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 3118 &offset, 0, retval); 3119 3120 out: 3121 fd_putfile(fd); 3122 return (error); 3123 } 3124 3125 /* 3126 * Positional gather write system call. 3127 */ 3128 int 3129 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, 3130 register_t *retval) 3131 { 3132 /* { 3133 syscallarg(int) fd; 3134 syscallarg(const struct iovec *) iovp; 3135 syscallarg(int) iovcnt; 3136 syscallarg(off_t) offset; 3137 } */ 3138 off_t offset = SCARG(uap, offset); 3139 3140 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 3141 SCARG(uap, iovcnt), &offset, 0, retval); 3142 } 3143 3144 /* 3145 * Check access permissions. 3146 */ 3147 int 3148 sys_access(struct lwp *l, const struct sys_access_args *uap, 3149 register_t *retval) 3150 { 3151 /* { 3152 syscallarg(const char *) path; 3153 syscallarg(int) flags; 3154 } */ 3155 3156 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 3157 SCARG(uap, flags), 0); 3158 } 3159 3160 int 3161 do_sys_accessat(struct lwp *l, int fdat, const char *path, 3162 int mode, int flags) 3163 { 3164 kauth_cred_t cred; 3165 struct vnode *vp; 3166 int error, nd_flag, vmode; 3167 struct pathbuf *pb; 3168 struct nameidata nd; 3169 3170 CTASSERT(F_OK == 0); 3171 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 3172 /* nonsense mode */ 3173 return EINVAL; 3174 } 3175 3176 nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; 3177 if (flags & AT_SYMLINK_NOFOLLOW) 3178 nd_flag &= ~FOLLOW; 3179 3180 error = pathbuf_copyin(path, &pb); 3181 if (error) 3182 return error; 3183 3184 NDINIT(&nd, LOOKUP, nd_flag, pb); 3185 3186 /* Override default credentials */ 3187 if (!(flags & AT_EACCESS)) { 3188 cred = kauth_cred_dup(l->l_cred); 3189 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 3190 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 3191 } else 3192 cred = l->l_cred; 3193 nd.ni_cnd.cn_cred = cred; 3194 3195 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3196 pathbuf_destroy(pb); 3197 goto out; 3198 } 3199 vp = nd.ni_vp; 3200 pathbuf_destroy(pb); 3201 3202 /* Flags == 0 means only check for existence. */ 3203 if (mode) { 3204 vmode = 0; 3205 if (mode & R_OK) 3206 vmode |= VREAD; 3207 if (mode & W_OK) 3208 vmode |= VWRITE; 3209 if (mode & X_OK) 3210 vmode |= VEXEC; 3211 3212 error = VOP_ACCESS(vp, vmode, cred); 3213 if (!error && (vmode & VWRITE)) 3214 error = vn_writechk(vp); 3215 } 3216 vput(vp); 3217 out: 3218 if (!(flags & AT_EACCESS)) 3219 kauth_cred_free(cred); 3220 return (error); 3221 } 3222 3223 int 3224 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3225 register_t *retval) 3226 { 3227 /* { 3228 syscallarg(int) fd; 3229 syscallarg(const char *) path; 3230 syscallarg(int) amode; 3231 syscallarg(int) flag; 3232 } */ 3233 3234 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3235 SCARG(uap, amode), SCARG(uap, flag)); 3236 } 3237 3238 /* 3239 * Common code for all sys_stat functions, including compat versions. 3240 */ 3241 int 3242 do_sys_stat(const char *userpath, unsigned int nd_flag, struct stat *sb) 3243 { 3244 3245 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3246 } 3247 3248 int 3249 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3250 unsigned int nd_flag, struct stat *sb) 3251 { 3252 int error; 3253 struct pathbuf *pb; 3254 struct nameidata nd; 3255 3256 KASSERT(l != NULL || fdat == AT_FDCWD); 3257 3258 error = pathbuf_copyin(userpath, &pb); 3259 if (error) { 3260 return error; 3261 } 3262 3263 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3264 3265 error = fd_nameiat(l, fdat, &nd); 3266 if (error != 0) { 3267 pathbuf_destroy(pb); 3268 return error; 3269 } 3270 error = vn_stat(nd.ni_vp, sb); 3271 vput(nd.ni_vp); 3272 pathbuf_destroy(pb); 3273 return error; 3274 } 3275 3276 /* 3277 * Get file status; this version follows links. 3278 */ 3279 /* ARGSUSED */ 3280 int 3281 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, 3282 register_t *retval) 3283 { 3284 /* { 3285 syscallarg(const char *) path; 3286 syscallarg(struct stat *) ub; 3287 } */ 3288 struct stat sb; 3289 int error; 3290 3291 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3292 if (error) 3293 return error; 3294 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3295 } 3296 3297 /* 3298 * Get file status; this version does not follow links. 3299 */ 3300 /* ARGSUSED */ 3301 int 3302 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, 3303 register_t *retval) 3304 { 3305 /* { 3306 syscallarg(const char *) path; 3307 syscallarg(struct stat *) ub; 3308 } */ 3309 struct stat sb; 3310 int error; 3311 3312 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3313 if (error) 3314 return error; 3315 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3316 } 3317 3318 int 3319 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3320 register_t *retval) 3321 { 3322 /* { 3323 syscallarg(int) fd; 3324 syscallarg(const char *) path; 3325 syscallarg(struct stat *) buf; 3326 syscallarg(int) flag; 3327 } */ 3328 unsigned int nd_flag; 3329 struct stat sb; 3330 int error; 3331 3332 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3333 nd_flag = NOFOLLOW; 3334 else 3335 nd_flag = FOLLOW; 3336 3337 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3338 &sb); 3339 if (error) 3340 return error; 3341 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3342 } 3343 3344 static int 3345 kern_pathconf(register_t *retval, const char *path, int name, int flag) 3346 { 3347 int error; 3348 struct pathbuf *pb; 3349 struct nameidata nd; 3350 3351 error = pathbuf_copyin(path, &pb); 3352 if (error) { 3353 return error; 3354 } 3355 NDINIT(&nd, LOOKUP, flag | LOCKLEAF | TRYEMULROOT, pb); 3356 if ((error = namei(&nd)) != 0) { 3357 pathbuf_destroy(pb); 3358 return error; 3359 } 3360 error = VOP_PATHCONF(nd.ni_vp, name, retval); 3361 vput(nd.ni_vp); 3362 pathbuf_destroy(pb); 3363 return error; 3364 } 3365 3366 /* 3367 * Get configurable pathname variables. 3368 */ 3369 /* ARGSUSED */ 3370 int 3371 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, 3372 register_t *retval) 3373 { 3374 /* { 3375 syscallarg(const char *) path; 3376 syscallarg(int) name; 3377 } */ 3378 3379 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3380 FOLLOW); 3381 } 3382 3383 /* ARGSUSED */ 3384 int 3385 sys_lpathconf(struct lwp *l, const struct sys_lpathconf_args *uap, 3386 register_t *retval) 3387 { 3388 /* { 3389 syscallarg(const char *) path; 3390 syscallarg(int) name; 3391 } */ 3392 3393 return kern_pathconf(retval, SCARG(uap, path), SCARG(uap, name), 3394 NOFOLLOW); 3395 } 3396 3397 /* 3398 * Return target name of a symbolic link. 3399 */ 3400 /* ARGSUSED */ 3401 int 3402 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3403 register_t *retval) 3404 { 3405 /* { 3406 syscallarg(const char *) path; 3407 syscallarg(char *) buf; 3408 syscallarg(size_t) count; 3409 } */ 3410 3411 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3412 SCARG(uap, buf), SCARG(uap, count), retval); 3413 } 3414 3415 static int 3416 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3417 size_t count, register_t *retval) 3418 { 3419 struct vnode *vp; 3420 struct iovec aiov; 3421 struct uio auio; 3422 int error; 3423 struct pathbuf *pb; 3424 struct nameidata nd; 3425 3426 error = pathbuf_copyin(path, &pb); 3427 if (error) { 3428 return error; 3429 } 3430 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, 3431 pb); 3432 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3433 pathbuf_destroy(pb); 3434 return error; 3435 } 3436 vp = nd.ni_vp; 3437 pathbuf_destroy(pb); 3438 if (vp->v_type != VLNK) 3439 error = EINVAL; 3440 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3441 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3442 aiov.iov_base = buf; 3443 aiov.iov_len = count; 3444 auio.uio_iov = &aiov; 3445 auio.uio_iovcnt = 1; 3446 auio.uio_offset = 0; 3447 auio.uio_rw = UIO_READ; 3448 KASSERT(l == curlwp); 3449 auio.uio_vmspace = l->l_proc->p_vmspace; 3450 auio.uio_resid = count; 3451 if ((error = VOP_READLINK(vp, &auio, l->l_cred)) == 0) 3452 *retval = count - auio.uio_resid; 3453 } 3454 vput(vp); 3455 return (error); 3456 } 3457 3458 int 3459 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3460 register_t *retval) 3461 { 3462 /* { 3463 syscallarg(int) fd; 3464 syscallarg(const char *) path; 3465 syscallarg(char *) buf; 3466 syscallarg(size_t) bufsize; 3467 } */ 3468 3469 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3470 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3471 } 3472 3473 /* 3474 * Change flags of a file given a path name. 3475 */ 3476 /* ARGSUSED */ 3477 int 3478 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, 3479 register_t *retval) 3480 { 3481 /* { 3482 syscallarg(const char *) path; 3483 syscallarg(u_long) flags; 3484 } */ 3485 struct vnode *vp; 3486 int error; 3487 3488 error = namei_simple_user(SCARG(uap, path), 3489 NSM_FOLLOW_TRYEMULROOT, &vp); 3490 if (error != 0) 3491 return (error); 3492 error = change_flags(vp, SCARG(uap, flags), l); 3493 vput(vp); 3494 return (error); 3495 } 3496 3497 /* 3498 * Change flags of a file given a file descriptor. 3499 */ 3500 /* ARGSUSED */ 3501 int 3502 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, 3503 register_t *retval) 3504 { 3505 /* { 3506 syscallarg(int) fd; 3507 syscallarg(u_long) flags; 3508 } */ 3509 struct vnode *vp; 3510 file_t *fp; 3511 int error; 3512 3513 /* fd_getvnode() will use the descriptor for us */ 3514 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3515 return (error); 3516 vp = fp->f_vnode; 3517 error = change_flags(vp, SCARG(uap, flags), l); 3518 VOP_UNLOCK(vp); 3519 fd_putfile(SCARG(uap, fd)); 3520 return (error); 3521 } 3522 3523 /* 3524 * Change flags of a file given a path name; this version does 3525 * not follow links. 3526 */ 3527 int 3528 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, 3529 register_t *retval) 3530 { 3531 /* { 3532 syscallarg(const char *) path; 3533 syscallarg(u_long) flags; 3534 } */ 3535 struct vnode *vp; 3536 int error; 3537 3538 error = namei_simple_user(SCARG(uap, path), 3539 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3540 if (error != 0) 3541 return (error); 3542 error = change_flags(vp, SCARG(uap, flags), l); 3543 vput(vp); 3544 return (error); 3545 } 3546 3547 /* 3548 * Common routine to change flags of a file. 3549 */ 3550 int 3551 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3552 { 3553 struct vattr vattr; 3554 int error; 3555 3556 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3557 3558 vattr_null(&vattr); 3559 vattr.va_flags = flags; 3560 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3561 3562 return (error); 3563 } 3564 3565 /* 3566 * Change mode of a file given path name; this version follows links. 3567 */ 3568 /* ARGSUSED */ 3569 int 3570 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3571 { 3572 /* { 3573 syscallarg(const char *) path; 3574 syscallarg(int) mode; 3575 } */ 3576 3577 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3578 SCARG(uap, mode), 0); 3579 } 3580 3581 int 3582 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3583 { 3584 int error; 3585 struct vnode *vp; 3586 namei_simple_flags_t ns_flag; 3587 3588 if (flags & AT_SYMLINK_NOFOLLOW) 3589 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3590 else 3591 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3592 3593 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3594 if (error != 0) 3595 return error; 3596 3597 error = change_mode(vp, mode, l); 3598 3599 vrele(vp); 3600 3601 return (error); 3602 } 3603 3604 /* 3605 * Change mode of a file given a file descriptor. 3606 */ 3607 /* ARGSUSED */ 3608 int 3609 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, 3610 register_t *retval) 3611 { 3612 /* { 3613 syscallarg(int) fd; 3614 syscallarg(int) mode; 3615 } */ 3616 file_t *fp; 3617 int error; 3618 3619 /* fd_getvnode() will use the descriptor for us */ 3620 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3621 return (error); 3622 error = change_mode(fp->f_vnode, SCARG(uap, mode), l); 3623 fd_putfile(SCARG(uap, fd)); 3624 return (error); 3625 } 3626 3627 int 3628 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3629 register_t *retval) 3630 { 3631 /* { 3632 syscallarg(int) fd; 3633 syscallarg(const char *) path; 3634 syscallarg(int) mode; 3635 syscallarg(int) flag; 3636 } */ 3637 3638 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3639 SCARG(uap, mode), SCARG(uap, flag)); 3640 } 3641 3642 /* 3643 * Change mode of a file given path name; this version does not follow links. 3644 */ 3645 /* ARGSUSED */ 3646 int 3647 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, 3648 register_t *retval) 3649 { 3650 /* { 3651 syscallarg(const char *) path; 3652 syscallarg(int) mode; 3653 } */ 3654 int error; 3655 struct vnode *vp; 3656 3657 error = namei_simple_user(SCARG(uap, path), 3658 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3659 if (error != 0) 3660 return (error); 3661 3662 error = change_mode(vp, SCARG(uap, mode), l); 3663 3664 vrele(vp); 3665 return (error); 3666 } 3667 3668 /* 3669 * Common routine to set mode given a vnode. 3670 */ 3671 static int 3672 change_mode(struct vnode *vp, int mode, struct lwp *l) 3673 { 3674 struct vattr vattr; 3675 int error; 3676 3677 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3678 vattr_null(&vattr); 3679 vattr.va_mode = mode & ALLPERMS; 3680 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3681 VOP_UNLOCK(vp); 3682 return (error); 3683 } 3684 3685 /* 3686 * Set ownership given a path name; this version follows links. 3687 */ 3688 /* ARGSUSED */ 3689 int 3690 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3691 { 3692 /* { 3693 syscallarg(const char *) path; 3694 syscallarg(uid_t) uid; 3695 syscallarg(gid_t) gid; 3696 } */ 3697 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3698 SCARG(uap, gid), 0); 3699 } 3700 3701 int 3702 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3703 gid_t gid, int flags) 3704 { 3705 int error; 3706 struct vnode *vp; 3707 namei_simple_flags_t ns_flag; 3708 3709 if (flags & AT_SYMLINK_NOFOLLOW) 3710 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3711 else 3712 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3713 3714 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3715 if (error != 0) 3716 return error; 3717 3718 error = change_owner(vp, uid, gid, l, 0); 3719 3720 vrele(vp); 3721 3722 return (error); 3723 } 3724 3725 /* 3726 * Set ownership given a path name; this version follows links. 3727 * Provides POSIX semantics. 3728 */ 3729 /* ARGSUSED */ 3730 int 3731 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, 3732 register_t *retval) 3733 { 3734 /* { 3735 syscallarg(const char *) path; 3736 syscallarg(uid_t) uid; 3737 syscallarg(gid_t) gid; 3738 } */ 3739 int error; 3740 struct vnode *vp; 3741 3742 error = namei_simple_user(SCARG(uap, path), 3743 NSM_FOLLOW_TRYEMULROOT, &vp); 3744 if (error != 0) 3745 return (error); 3746 3747 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3748 3749 vrele(vp); 3750 return (error); 3751 } 3752 3753 /* 3754 * Set ownership given a file descriptor. 3755 */ 3756 /* ARGSUSED */ 3757 int 3758 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, 3759 register_t *retval) 3760 { 3761 /* { 3762 syscallarg(int) fd; 3763 syscallarg(uid_t) uid; 3764 syscallarg(gid_t) gid; 3765 } */ 3766 int error; 3767 file_t *fp; 3768 3769 /* fd_getvnode() will use the descriptor for us */ 3770 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3771 return (error); 3772 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3773 l, 0); 3774 fd_putfile(SCARG(uap, fd)); 3775 return (error); 3776 } 3777 3778 int 3779 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3780 register_t *retval) 3781 { 3782 /* { 3783 syscallarg(int) fd; 3784 syscallarg(const char *) path; 3785 syscallarg(uid_t) owner; 3786 syscallarg(gid_t) group; 3787 syscallarg(int) flag; 3788 } */ 3789 3790 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3791 SCARG(uap, owner), SCARG(uap, group), 3792 SCARG(uap, flag)); 3793 } 3794 3795 /* 3796 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3797 */ 3798 /* ARGSUSED */ 3799 int 3800 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, 3801 register_t *retval) 3802 { 3803 /* { 3804 syscallarg(int) fd; 3805 syscallarg(uid_t) uid; 3806 syscallarg(gid_t) gid; 3807 } */ 3808 int error; 3809 file_t *fp; 3810 3811 /* fd_getvnode() will use the descriptor for us */ 3812 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3813 return (error); 3814 error = change_owner(fp->f_vnode, SCARG(uap, uid), SCARG(uap, gid), 3815 l, 1); 3816 fd_putfile(SCARG(uap, fd)); 3817 return (error); 3818 } 3819 3820 /* 3821 * Set ownership given a path name; this version does not follow links. 3822 */ 3823 /* ARGSUSED */ 3824 int 3825 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, 3826 register_t *retval) 3827 { 3828 /* { 3829 syscallarg(const char *) path; 3830 syscallarg(uid_t) uid; 3831 syscallarg(gid_t) gid; 3832 } */ 3833 int error; 3834 struct vnode *vp; 3835 3836 error = namei_simple_user(SCARG(uap, path), 3837 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3838 if (error != 0) 3839 return (error); 3840 3841 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3842 3843 vrele(vp); 3844 return (error); 3845 } 3846 3847 /* 3848 * Set ownership given a path name; this version does not follow links. 3849 * Provides POSIX/XPG semantics. 3850 */ 3851 /* ARGSUSED */ 3852 int 3853 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, 3854 register_t *retval) 3855 { 3856 /* { 3857 syscallarg(const char *) path; 3858 syscallarg(uid_t) uid; 3859 syscallarg(gid_t) gid; 3860 } */ 3861 int error; 3862 struct vnode *vp; 3863 3864 error = namei_simple_user(SCARG(uap, path), 3865 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3866 if (error != 0) 3867 return (error); 3868 3869 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3870 3871 vrele(vp); 3872 return (error); 3873 } 3874 3875 /* 3876 * Common routine to set ownership given a vnode. 3877 */ 3878 static int 3879 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3880 int posix_semantics) 3881 { 3882 struct vattr vattr; 3883 mode_t newmode; 3884 int error; 3885 3886 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3887 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3888 goto out; 3889 3890 #define CHANGED(x) ((int)(x) != -1) 3891 newmode = vattr.va_mode; 3892 if (posix_semantics) { 3893 /* 3894 * POSIX/XPG semantics: if the caller is not the super-user, 3895 * clear set-user-id and set-group-id bits. Both POSIX and 3896 * the XPG consider the behaviour for calls by the super-user 3897 * implementation-defined; we leave the set-user-id and set- 3898 * group-id settings intact in that case. 3899 */ 3900 if (vattr.va_mode & S_ISUID) { 3901 if (kauth_authorize_vnode(l->l_cred, 3902 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3903 newmode &= ~S_ISUID; 3904 } 3905 if (vattr.va_mode & S_ISGID) { 3906 if (kauth_authorize_vnode(l->l_cred, 3907 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3908 newmode &= ~S_ISGID; 3909 } 3910 } else { 3911 /* 3912 * NetBSD semantics: when changing owner and/or group, 3913 * clear the respective bit(s). 3914 */ 3915 if (CHANGED(uid)) 3916 newmode &= ~S_ISUID; 3917 if (CHANGED(gid)) 3918 newmode &= ~S_ISGID; 3919 } 3920 /* Update va_mode iff altered. */ 3921 if (vattr.va_mode == newmode) 3922 newmode = VNOVAL; 3923 3924 vattr_null(&vattr); 3925 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3926 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3927 vattr.va_mode = newmode; 3928 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3929 #undef CHANGED 3930 3931 out: 3932 VOP_UNLOCK(vp); 3933 return (error); 3934 } 3935 3936 /* 3937 * Set the access and modification times given a path name; this 3938 * version follows links. 3939 */ 3940 /* ARGSUSED */ 3941 int 3942 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3943 register_t *retval) 3944 { 3945 /* { 3946 syscallarg(const char *) path; 3947 syscallarg(const struct timeval *) tptr; 3948 } */ 3949 3950 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3951 SCARG(uap, tptr), UIO_USERSPACE); 3952 } 3953 3954 /* 3955 * Set the access and modification times given a file descriptor. 3956 */ 3957 /* ARGSUSED */ 3958 int 3959 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3960 register_t *retval) 3961 { 3962 /* { 3963 syscallarg(int) fd; 3964 syscallarg(const struct timeval *) tptr; 3965 } */ 3966 int error; 3967 file_t *fp; 3968 3969 /* fd_getvnode() will use the descriptor for us */ 3970 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3971 return (error); 3972 error = do_sys_utimes(l, fp->f_vnode, NULL, 0, SCARG(uap, tptr), 3973 UIO_USERSPACE); 3974 fd_putfile(SCARG(uap, fd)); 3975 return (error); 3976 } 3977 3978 int 3979 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3980 register_t *retval) 3981 { 3982 /* { 3983 syscallarg(int) fd; 3984 syscallarg(const struct timespec *) tptr; 3985 } */ 3986 int error; 3987 file_t *fp; 3988 3989 /* fd_getvnode() will use the descriptor for us */ 3990 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3991 return (error); 3992 error = do_sys_utimensat(l, AT_FDCWD, fp->f_vnode, NULL, 0, 3993 SCARG(uap, tptr), UIO_USERSPACE); 3994 fd_putfile(SCARG(uap, fd)); 3995 return (error); 3996 } 3997 3998 /* 3999 * Set the access and modification times given a path name; this 4000 * version does not follow links. 4001 */ 4002 int 4003 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 4004 register_t *retval) 4005 { 4006 /* { 4007 syscallarg(const char *) path; 4008 syscallarg(const struct timeval *) tptr; 4009 } */ 4010 4011 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 4012 SCARG(uap, tptr), UIO_USERSPACE); 4013 } 4014 4015 int 4016 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 4017 register_t *retval) 4018 { 4019 /* { 4020 syscallarg(int) fd; 4021 syscallarg(const char *) path; 4022 syscallarg(const struct timespec *) tptr; 4023 syscallarg(int) flag; 4024 } */ 4025 int follow; 4026 const struct timespec *tptr; 4027 int error; 4028 4029 tptr = SCARG(uap, tptr); 4030 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 4031 4032 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 4033 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 4034 4035 return error; 4036 } 4037 4038 /* 4039 * Common routine to set access and modification times given a vnode. 4040 */ 4041 int 4042 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 4043 const struct timespec *tptr, enum uio_seg seg) 4044 { 4045 4046 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 4047 } 4048 4049 int 4050 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 4051 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 4052 { 4053 struct vattr vattr; 4054 int error, dorele = 0; 4055 namei_simple_flags_t sflags; 4056 bool vanull, setbirthtime; 4057 struct timespec ts[2]; 4058 4059 KASSERT(l != NULL || fdat == AT_FDCWD); 4060 4061 /* 4062 * I have checked all callers and they pass either FOLLOW, 4063 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 4064 * is 0. More to the point, they don't pass anything else. 4065 * Let's keep it that way at least until the namei interfaces 4066 * are fully sanitized. 4067 */ 4068 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 4069 sflags = (flag == FOLLOW) ? 4070 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 4071 4072 if (tptr == NULL) { 4073 vanull = true; 4074 nanotime(&ts[0]); 4075 ts[1] = ts[0]; 4076 } else { 4077 vanull = false; 4078 if (seg != UIO_SYSSPACE) { 4079 error = copyin(tptr, ts, sizeof (ts)); 4080 if (error != 0) 4081 return error; 4082 } else { 4083 ts[0] = tptr[0]; 4084 ts[1] = tptr[1]; 4085 } 4086 } 4087 4088 if (ts[0].tv_nsec == UTIME_NOW) { 4089 nanotime(&ts[0]); 4090 if (ts[1].tv_nsec == UTIME_NOW) { 4091 vanull = true; 4092 ts[1] = ts[0]; 4093 } 4094 } else if (ts[1].tv_nsec == UTIME_NOW) 4095 nanotime(&ts[1]); 4096 4097 if (vp == NULL) { 4098 /* note: SEG describes TPTR, not PATH; PATH is always user */ 4099 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 4100 if (error != 0) 4101 return error; 4102 dorele = 1; 4103 } 4104 4105 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4106 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 4107 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 4108 vattr_null(&vattr); 4109 4110 if (ts[0].tv_nsec != UTIME_OMIT) 4111 vattr.va_atime = ts[0]; 4112 4113 if (ts[1].tv_nsec != UTIME_OMIT) { 4114 vattr.va_mtime = ts[1]; 4115 if (setbirthtime) 4116 vattr.va_birthtime = ts[1]; 4117 } 4118 4119 if (vanull) 4120 vattr.va_vaflags |= VA_UTIMES_NULL; 4121 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4122 VOP_UNLOCK(vp); 4123 4124 if (dorele != 0) 4125 vrele(vp); 4126 4127 return error; 4128 } 4129 4130 int 4131 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 4132 const struct timeval *tptr, enum uio_seg seg) 4133 { 4134 struct timespec ts[2]; 4135 struct timespec *tsptr = NULL; 4136 int error; 4137 4138 if (tptr != NULL) { 4139 struct timeval tv[2]; 4140 4141 if (seg != UIO_SYSSPACE) { 4142 error = copyin(tptr, tv, sizeof(tv)); 4143 if (error != 0) 4144 return error; 4145 tptr = tv; 4146 } 4147 4148 if ((tptr[0].tv_usec == UTIME_NOW) || 4149 (tptr[0].tv_usec == UTIME_OMIT)) 4150 ts[0].tv_nsec = tptr[0].tv_usec; 4151 else { 4152 if (tptr[0].tv_usec < 0 || tptr[0].tv_usec >= 1000000) 4153 return EINVAL; 4154 4155 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 4156 } 4157 4158 if ((tptr[1].tv_usec == UTIME_NOW) || 4159 (tptr[1].tv_usec == UTIME_OMIT)) 4160 ts[1].tv_nsec = tptr[1].tv_usec; 4161 else { 4162 if (tptr[1].tv_usec < 0 || tptr[1].tv_usec >= 1000000) 4163 return EINVAL; 4164 4165 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 4166 } 4167 4168 tsptr = &ts[0]; 4169 } 4170 4171 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 4172 } 4173 4174 /* 4175 * Truncate a file given its path name. 4176 */ 4177 /* ARGSUSED */ 4178 int 4179 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, 4180 register_t *retval) 4181 { 4182 /* { 4183 syscallarg(const char *) path; 4184 syscallarg(int) pad; 4185 syscallarg(off_t) length; 4186 } */ 4187 struct vnode *vp; 4188 struct vattr vattr; 4189 int error; 4190 4191 if (SCARG(uap, length) < 0) 4192 return EINVAL; 4193 4194 error = namei_simple_user(SCARG(uap, path), 4195 NSM_FOLLOW_TRYEMULROOT, &vp); 4196 if (error != 0) 4197 return (error); 4198 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4199 if (vp->v_type == VDIR) 4200 error = EISDIR; 4201 else if ((error = vn_writechk(vp)) == 0 && 4202 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 4203 vattr_null(&vattr); 4204 vattr.va_size = SCARG(uap, length); 4205 error = VOP_SETATTR(vp, &vattr, l->l_cred); 4206 } 4207 vput(vp); 4208 return (error); 4209 } 4210 4211 /* 4212 * Truncate a file given a file descriptor. 4213 */ 4214 /* ARGSUSED */ 4215 int 4216 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, 4217 register_t *retval) 4218 { 4219 /* { 4220 syscallarg(int) fd; 4221 syscallarg(int) pad; 4222 syscallarg(off_t) length; 4223 } */ 4224 file_t *fp; 4225 int error, fd = SCARG(uap, fd); 4226 4227 fp = fd_getfile(fd); 4228 if (fp == NULL) 4229 return EBADF; 4230 if (fp->f_ops->fo_truncate == NULL) 4231 error = EOPNOTSUPP; 4232 else 4233 error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length)); 4234 4235 fd_putfile(fd); 4236 return error; 4237 } 4238 4239 /* 4240 * Sync an open file. 4241 */ 4242 /* ARGSUSED */ 4243 int 4244 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 4245 { 4246 /* { 4247 syscallarg(int) fd; 4248 } */ 4249 struct vnode *vp; 4250 file_t *fp; 4251 int error; 4252 4253 /* fd_getvnode() will use the descriptor for us */ 4254 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4255 return (error); 4256 vp = fp->f_vnode; 4257 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4258 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 4259 VOP_UNLOCK(vp); 4260 fd_putfile(SCARG(uap, fd)); 4261 return (error); 4262 } 4263 4264 /* 4265 * Sync a range of file data. API modeled after that found in AIX. 4266 * 4267 * FDATASYNC indicates that we need only save enough metadata to be able 4268 * to re-read the written data. 4269 */ 4270 /* ARGSUSED */ 4271 int 4272 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, 4273 register_t *retval) 4274 { 4275 /* { 4276 syscallarg(int) fd; 4277 syscallarg(int) flags; 4278 syscallarg(off_t) start; 4279 syscallarg(off_t) length; 4280 } */ 4281 struct vnode *vp; 4282 file_t *fp; 4283 int flags, nflags; 4284 off_t s, e, len; 4285 int error; 4286 4287 /* fd_getvnode() will use the descriptor for us */ 4288 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4289 return (error); 4290 4291 if ((fp->f_flag & FWRITE) == 0) { 4292 error = EBADF; 4293 goto out; 4294 } 4295 4296 flags = SCARG(uap, flags); 4297 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4298 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4299 error = EINVAL; 4300 goto out; 4301 } 4302 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4303 if (flags & FDATASYNC) 4304 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4305 else 4306 nflags = FSYNC_WAIT; 4307 if (flags & FDISKSYNC) 4308 nflags |= FSYNC_CACHE; 4309 4310 len = SCARG(uap, length); 4311 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4312 if (len) { 4313 s = SCARG(uap, start); 4314 if (s < 0 || len < 0 || len > OFF_T_MAX - s) { 4315 error = EINVAL; 4316 goto out; 4317 } 4318 e = s + len; 4319 KASSERT(s <= e); 4320 } else { 4321 e = 0; 4322 s = 0; 4323 } 4324 4325 vp = fp->f_vnode; 4326 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4327 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4328 VOP_UNLOCK(vp); 4329 out: 4330 fd_putfile(SCARG(uap, fd)); 4331 return (error); 4332 } 4333 4334 /* 4335 * Sync the data of an open file. 4336 */ 4337 /* ARGSUSED */ 4338 int 4339 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, 4340 register_t *retval) 4341 { 4342 /* { 4343 syscallarg(int) fd; 4344 } */ 4345 struct vnode *vp; 4346 file_t *fp; 4347 int error; 4348 4349 /* fd_getvnode() will use the descriptor for us */ 4350 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4351 return (error); 4352 vp = fp->f_vnode; 4353 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4354 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4355 VOP_UNLOCK(vp); 4356 fd_putfile(SCARG(uap, fd)); 4357 return (error); 4358 } 4359 4360 /* 4361 * Rename files, (standard) BSD semantics frontend. 4362 */ 4363 /* ARGSUSED */ 4364 int 4365 sys_rename(struct lwp *l, const struct sys_rename_args *uap, 4366 register_t *retval) 4367 { 4368 /* { 4369 syscallarg(const char *) from; 4370 syscallarg(const char *) to; 4371 } */ 4372 4373 return do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4374 SCARG(uap, to), UIO_USERSPACE, 0); 4375 } 4376 4377 int 4378 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4379 register_t *retval) 4380 { 4381 /* { 4382 syscallarg(int) fromfd; 4383 syscallarg(const char *) from; 4384 syscallarg(int) tofd; 4385 syscallarg(const char *) to; 4386 } */ 4387 4388 return do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4389 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0); 4390 } 4391 4392 /* 4393 * Rename files, POSIX semantics frontend. 4394 */ 4395 /* ARGSUSED */ 4396 int 4397 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, 4398 register_t *retval) 4399 { 4400 /* { 4401 syscallarg(const char *) from; 4402 syscallarg(const char *) to; 4403 } */ 4404 4405 return do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4406 SCARG(uap, to), UIO_USERSPACE, 1); 4407 } 4408 4409 /* 4410 * Rename files. Source and destination must either both be directories, 4411 * or both not be directories. If target is a directory, it must be empty. 4412 * If `from' and `to' refer to the same object, the value of the `retain' 4413 * argument is used to determine whether `from' will be 4414 * 4415 * (retain == 0) deleted unless `from' and `to' refer to the same 4416 * object in the file system's name space (BSD). 4417 * (retain == 1) always retained (POSIX). 4418 * 4419 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4420 */ 4421 int 4422 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4423 { 4424 4425 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, 4426 retain); 4427 } 4428 4429 static int 4430 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4431 const char *to, enum uio_seg seg, int retain) 4432 { 4433 struct pathbuf *fpb, *tpb; 4434 struct nameidata fnd, tnd; 4435 struct vnode *fdvp, *fvp; 4436 struct vnode *tdvp, *tvp; 4437 struct mount *mp, *tmp; 4438 int error; 4439 4440 KASSERT(l != NULL || fromfd == AT_FDCWD); 4441 KASSERT(l != NULL || tofd == AT_FDCWD); 4442 4443 error = pathbuf_maybe_copyin(from, seg, &fpb); 4444 if (error) 4445 goto out0; 4446 KASSERT(fpb != NULL); 4447 4448 error = pathbuf_maybe_copyin(to, seg, &tpb); 4449 if (error) 4450 goto out1; 4451 KASSERT(tpb != NULL); 4452 4453 /* 4454 * Lookup from. 4455 * 4456 * XXX LOCKPARENT is wrong because we don't actually want it 4457 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4458 * insane, so for the time being we need to leave it like this. 4459 */ 4460 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT), fpb); 4461 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4462 goto out2; 4463 4464 /* 4465 * Pull out the important results of the lookup, fdvp and fvp. 4466 * Of course, fvp is bogus because we're about to unlock fdvp. 4467 */ 4468 fdvp = fnd.ni_dvp; 4469 fvp = fnd.ni_vp; 4470 mp = fdvp->v_mount; 4471 KASSERT(fdvp != NULL); 4472 KASSERT(fvp != NULL); 4473 KASSERT(fdvp == fvp || VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE); 4474 /* 4475 * Bracket the operation with fstrans_start()/fstrans_done(). 4476 * 4477 * Inside the bracket this file system cannot be unmounted so 4478 * a vnode on this file system cannot change its v_mount. 4479 * A vnode on another file system may still change to dead mount. 4480 */ 4481 fstrans_start(mp); 4482 4483 /* 4484 * Make sure neither fdvp nor fvp is locked. 4485 */ 4486 if (fdvp != fvp) 4487 VOP_UNLOCK(fdvp); 4488 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4489 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4490 4491 /* 4492 * Reject renaming `.' and `..'. Can't do this until after 4493 * namei because we need namei's parsing to find the final 4494 * component name. (namei should just leave us with the final 4495 * component name and not look it up itself, but anyway...) 4496 * 4497 * This was here before because we used to relookup from 4498 * instead of to and relookup requires the caller to check 4499 * this, but now file systems may depend on this check, so we 4500 * must retain it until the file systems are all rototilled. 4501 */ 4502 if ((fnd.ni_cnd.cn_namelen == 1 && 4503 fnd.ni_cnd.cn_nameptr[0] == '.') || 4504 (fnd.ni_cnd.cn_namelen == 2 && 4505 fnd.ni_cnd.cn_nameptr[0] == '.' && 4506 fnd.ni_cnd.cn_nameptr[1] == '.')) { 4507 error = EINVAL; /* XXX EISDIR? */ 4508 goto abort0; 4509 } 4510 4511 /* 4512 * Lookup to. 4513 * 4514 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4515 * fvp here to decide whether to add CREATEDIR is a load of 4516 * bollocks because fvp might be the wrong node by now, since 4517 * fdvp is unlocked. 4518 * 4519 * XXX Why not pass CREATEDIR always? 4520 */ 4521 NDINIT(&tnd, RENAME, 4522 (LOCKPARENT | NOCACHE | TRYEMULROOT | 4523 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4524 tpb); 4525 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4526 goto abort0; 4527 4528 /* 4529 * Pull out the important results of the lookup, tdvp and tvp. 4530 * Of course, tvp is bogus because we're about to unlock tdvp. 4531 */ 4532 tdvp = tnd.ni_dvp; 4533 tvp = tnd.ni_vp; 4534 KASSERT(tdvp != NULL); 4535 KASSERT(tdvp == tvp || VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4536 4537 if (fvp->v_type == VDIR) 4538 tnd.ni_cnd.cn_flags |= WILLBEDIR; 4539 /* 4540 * Make sure neither tdvp nor tvp is locked. 4541 */ 4542 if (tdvp != tvp) 4543 VOP_UNLOCK(tdvp); 4544 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4545 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4546 4547 /* 4548 * Reject renaming onto `.' or `..'. relookup is unhappy with 4549 * these, which is why we must do this here. Once upon a time 4550 * we relooked up from instead of to, and consequently didn't 4551 * need this check, but now that we relookup to instead of 4552 * from, we need this; and we shall need it forever forward 4553 * until the VOP_RENAME protocol changes, because file systems 4554 * will no doubt begin to depend on this check. 4555 */ 4556 if (tnd.ni_cnd.cn_namelen == 1 && tnd.ni_cnd.cn_nameptr[0] == '.') { 4557 error = EISDIR; 4558 goto abort1; 4559 } 4560 if (tnd.ni_cnd.cn_namelen == 2 && 4561 tnd.ni_cnd.cn_nameptr[0] == '.' && 4562 tnd.ni_cnd.cn_nameptr[1] == '.') { 4563 error = EINVAL; 4564 goto abort1; 4565 } 4566 4567 /* 4568 * Make sure the mount points match. Although we don't hold 4569 * any vnode locks, the v_mount on fdvp file system are stable. 4570 * 4571 * Unmounting another file system at an inopportune moment may 4572 * cause tdvp to disappear and change its v_mount to dead. 4573 * 4574 * So in either case different v_mount means cross-device rename. 4575 */ 4576 KASSERT(mp != NULL); 4577 tmp = tdvp->v_mount; 4578 4579 if (mp != tmp) { 4580 error = EXDEV; 4581 goto abort1; 4582 } 4583 4584 /* 4585 * Take the vfs rename lock to avoid cross-directory screw cases. 4586 * Nothing is locked currently, so taking this lock is safe. 4587 */ 4588 error = VFS_RENAMELOCK_ENTER(mp); 4589 if (error) 4590 goto abort1; 4591 4592 /* 4593 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4594 * and nothing is locked except for the vfs rename lock. 4595 * 4596 * The next step is a little rain dance to conform to the 4597 * insane lock protocol, even though it does nothing to ward 4598 * off race conditions. 4599 * 4600 * We need tdvp and tvp to be locked. However, because we have 4601 * unlocked tdvp in order to hold no locks while we take the 4602 * vfs rename lock, tvp may be wrong here, and we can't safely 4603 * lock it even if the sensible file systems will just unlock 4604 * it straight away. Consequently, we must lock tdvp and then 4605 * relookup tvp to get it locked. 4606 * 4607 * Finally, because the VOP_RENAME protocol is brain-damaged 4608 * and various file systems insanely depend on the semantics of 4609 * this brain damage, the lookup of to must be the last lookup 4610 * before VOP_RENAME. 4611 */ 4612 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4613 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4614 if (error) 4615 goto abort2; 4616 4617 /* 4618 * Drop the old tvp and pick up the new one -- which might be 4619 * the same, but that doesn't matter to us. After this, tdvp 4620 * and tvp should both be locked. 4621 */ 4622 if (tvp != NULL) 4623 vrele(tvp); 4624 tvp = tnd.ni_vp; 4625 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4626 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 4627 4628 /* 4629 * The old do_sys_rename had various consistency checks here 4630 * involving fvp and tvp. fvp is bogus already here, and tvp 4631 * will become bogus soon in any sensible file system, so the 4632 * only purpose in putting these checks here is to give lip 4633 * service to these screw cases and to acknowledge that they 4634 * exist, not actually to handle them, but here you go 4635 * anyway... 4636 */ 4637 4638 /* 4639 * Acknowledge that directories and non-directories aren't 4640 * supposed to mix. 4641 */ 4642 if (tvp != NULL) { 4643 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 4644 error = ENOTDIR; 4645 goto abort3; 4646 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 4647 error = EISDIR; 4648 goto abort3; 4649 } 4650 } 4651 4652 /* 4653 * Acknowledge some random screw case, among the dozens that 4654 * might arise. 4655 */ 4656 if (fvp == tdvp) { 4657 error = EINVAL; 4658 goto abort3; 4659 } 4660 4661 /* 4662 * Acknowledge that POSIX has a wacky screw case. 4663 * 4664 * XXX Eventually the retain flag needs to be passed on to 4665 * VOP_RENAME. 4666 */ 4667 if (fvp == tvp) { 4668 if (retain) { 4669 error = 0; 4670 goto abort3; 4671 } else if (fdvp == tdvp && 4672 fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen && 4673 0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4674 fnd.ni_cnd.cn_namelen)) { 4675 error = 0; 4676 goto abort3; 4677 } 4678 } 4679 4680 /* 4681 * Make sure veriexec can screw us up. (But a race can screw 4682 * up veriexec, of course -- remember, fvp and (soon) tvp are 4683 * bogus.) 4684 */ 4685 #if NVERIEXEC > 0 4686 { 4687 char *f1, *f2; 4688 size_t f1_len; 4689 size_t f2_len; 4690 4691 f1_len = fnd.ni_cnd.cn_namelen + 1; 4692 f1 = kmem_alloc(f1_len, KM_SLEEP); 4693 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4694 4695 f2_len = tnd.ni_cnd.cn_namelen + 1; 4696 f2 = kmem_alloc(f2_len, KM_SLEEP); 4697 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4698 4699 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4700 4701 kmem_free(f1, f1_len); 4702 kmem_free(f2, f2_len); 4703 4704 if (error) 4705 goto abort3; 4706 } 4707 #endif /* NVERIEXEC > 0 */ 4708 4709 /* 4710 * All ready. Incant the rename vop. 4711 */ 4712 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4713 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4714 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4715 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp) == LK_EXCLUSIVE); 4716 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4717 4718 /* 4719 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4720 * tdvp and tvp. But we can't assert any of that. 4721 */ 4722 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4723 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4724 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4725 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4726 4727 /* 4728 * So all we have left to do is to drop the rename lock and 4729 * destroy the pathbufs. 4730 */ 4731 VFS_RENAMELOCK_EXIT(mp); 4732 fstrans_done(mp); 4733 goto out2; 4734 4735 abort3: if (tvp != NULL && tvp != tdvp) 4736 VOP_UNLOCK(tvp); 4737 abort2: VOP_UNLOCK(tdvp); 4738 VFS_RENAMELOCK_EXIT(mp); 4739 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4740 vrele(tdvp); 4741 if (tvp != NULL) 4742 vrele(tvp); 4743 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4744 vrele(fdvp); 4745 vrele(fvp); 4746 fstrans_done(mp); 4747 out2: pathbuf_destroy(tpb); 4748 out1: pathbuf_destroy(fpb); 4749 out0: return error; 4750 } 4751 4752 /* 4753 * Make a directory file. 4754 */ 4755 /* ARGSUSED */ 4756 int 4757 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4758 { 4759 /* { 4760 syscallarg(const char *) path; 4761 syscallarg(int) mode; 4762 } */ 4763 4764 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4765 SCARG(uap, mode), UIO_USERSPACE); 4766 } 4767 4768 int 4769 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4770 register_t *retval) 4771 { 4772 /* { 4773 syscallarg(int) fd; 4774 syscallarg(const char *) path; 4775 syscallarg(int) mode; 4776 } */ 4777 4778 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4779 SCARG(uap, mode), UIO_USERSPACE); 4780 } 4781 4782 int 4783 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4784 { 4785 4786 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, seg); 4787 } 4788 4789 static int 4790 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4791 enum uio_seg seg) 4792 { 4793 struct proc *p = curlwp->l_proc; 4794 struct vnode *vp; 4795 struct vattr vattr; 4796 int error; 4797 struct pathbuf *pb; 4798 struct nameidata nd; 4799 4800 KASSERT(l != NULL || fdat == AT_FDCWD); 4801 4802 /* XXX bollocks, should pass in a pathbuf */ 4803 error = pathbuf_maybe_copyin(path, seg, &pb); 4804 if (error) { 4805 return error; 4806 } 4807 4808 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4809 4810 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4811 pathbuf_destroy(pb); 4812 return (error); 4813 } 4814 vp = nd.ni_vp; 4815 if (vp != NULL) { 4816 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4817 if (nd.ni_dvp == vp) 4818 vrele(nd.ni_dvp); 4819 else 4820 vput(nd.ni_dvp); 4821 vrele(vp); 4822 pathbuf_destroy(pb); 4823 return (EEXIST); 4824 } 4825 vattr_null(&vattr); 4826 vattr.va_type = VDIR; 4827 /* We will read cwdi->cwdi_cmask unlocked. */ 4828 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4829 nd.ni_cnd.cn_flags |= WILLBEDIR; 4830 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4831 if (!error) 4832 vrele(nd.ni_vp); 4833 vput(nd.ni_dvp); 4834 pathbuf_destroy(pb); 4835 return (error); 4836 } 4837 4838 /* 4839 * Remove a directory file. 4840 */ 4841 /* ARGSUSED */ 4842 int 4843 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4844 { 4845 /* { 4846 syscallarg(char *) path; 4847 } */ 4848 4849 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), AT_REMOVEDIR, 4850 UIO_USERSPACE); 4851 } 4852 4853 /* 4854 * Read a block of directory entries in a file system independent format. 4855 */ 4856 int 4857 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, 4858 register_t *retval) 4859 { 4860 /* { 4861 syscallarg(int) fd; 4862 syscallarg(char *) buf; 4863 syscallarg(size_t) count; 4864 } */ 4865 file_t *fp; 4866 int error, done; 4867 4868 /* fd_getvnode() will use the descriptor for us */ 4869 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4870 return (error); 4871 if ((fp->f_flag & FREAD) == 0) { 4872 error = EBADF; 4873 goto out; 4874 } 4875 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4876 SCARG(uap, count), &done, l, 0, 0); 4877 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4878 *retval = done; 4879 out: 4880 fd_putfile(SCARG(uap, fd)); 4881 return (error); 4882 } 4883 4884 /* 4885 * Set the mode mask for creation of filesystem nodes. 4886 */ 4887 int 4888 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4889 { 4890 /* { 4891 syscallarg(mode_t) newmask; 4892 } */ 4893 4894 /* 4895 * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of 4896 * serialization with those reads is required. It's important to 4897 * return a coherent answer for the caller of umask() though, and 4898 * the atomic operation accomplishes that. 4899 */ 4900 *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, 4901 SCARG(uap, newmask) & ALLPERMS); 4902 4903 return (0); 4904 } 4905 4906 int 4907 dorevoke(struct vnode *vp, kauth_cred_t cred) 4908 { 4909 struct vattr vattr; 4910 int error, fs_decision; 4911 4912 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4913 error = VOP_GETATTR(vp, &vattr, cred); 4914 VOP_UNLOCK(vp); 4915 if (error != 0) 4916 return error; 4917 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4918 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4919 fs_decision); 4920 if (!error) 4921 VOP_REVOKE(vp, REVOKEALL); 4922 return (error); 4923 } 4924 4925 /* 4926 * Void all references to file by ripping underlying filesystem 4927 * away from vnode. 4928 */ 4929 /* ARGSUSED */ 4930 int 4931 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, 4932 register_t *retval) 4933 { 4934 /* { 4935 syscallarg(const char *) path; 4936 } */ 4937 struct vnode *vp; 4938 int error; 4939 4940 error = namei_simple_user(SCARG(uap, path), NSM_FOLLOW_TRYEMULROOT, 4941 &vp); 4942 if (error != 0) 4943 return (error); 4944 error = dorevoke(vp, l->l_cred); 4945 vrele(vp); 4946 return (error); 4947 } 4948 4949 /* 4950 * Allocate backing store for a file, filling a hole without having to 4951 * explicitly write anything out. 4952 */ 4953 /* ARGSUSED */ 4954 int 4955 sys_posix_fallocate(struct lwp *l, const struct sys_posix_fallocate_args *uap, 4956 register_t *retval) 4957 { 4958 /* { 4959 syscallarg(int) fd; 4960 syscallarg(off_t) pos; 4961 syscallarg(off_t) len; 4962 } */ 4963 int fd; 4964 off_t pos, len; 4965 struct file *fp; 4966 struct vnode *vp; 4967 int error; 4968 4969 fd = SCARG(uap, fd); 4970 pos = SCARG(uap, pos); 4971 len = SCARG(uap, len); 4972 4973 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 4974 *retval = EINVAL; 4975 return 0; 4976 } 4977 4978 error = fd_getvnode(fd, &fp); 4979 if (error) { 4980 *retval = error; 4981 return 0; 4982 } 4983 if ((fp->f_flag & FWRITE) == 0) { 4984 error = EBADF; 4985 goto fail; 4986 } 4987 vp = fp->f_vnode; 4988 4989 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4990 if (vp->v_type == VDIR) { 4991 error = EISDIR; 4992 } else { 4993 error = VOP_FALLOCATE(vp, pos, len); 4994 } 4995 VOP_UNLOCK(vp); 4996 4997 fail: 4998 fd_putfile(fd); 4999 *retval = error; 5000 return 0; 5001 } 5002 5003 /* 5004 * Deallocate backing store for a file, creating a hole. Also used for 5005 * invoking TRIM on disks. 5006 */ 5007 /* ARGSUSED */ 5008 int 5009 sys_fdiscard(struct lwp *l, const struct sys_fdiscard_args *uap, 5010 register_t *retval) 5011 { 5012 /* { 5013 syscallarg(int) fd; 5014 syscallarg(off_t) pos; 5015 syscallarg(off_t) len; 5016 } */ 5017 int fd; 5018 off_t pos, len; 5019 struct file *fp; 5020 struct vnode *vp; 5021 int error; 5022 5023 fd = SCARG(uap, fd); 5024 pos = SCARG(uap, pos); 5025 len = SCARG(uap, len); 5026 5027 if (pos < 0 || len < 0 || len > OFF_T_MAX - pos) { 5028 return EINVAL; 5029 } 5030 5031 error = fd_getvnode(fd, &fp); 5032 if (error) { 5033 return error; 5034 } 5035 if ((fp->f_flag & FWRITE) == 0) { 5036 error = EBADF; 5037 goto fail; 5038 } 5039 vp = fp->f_vnode; 5040 5041 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 5042 if (vp->v_type == VDIR) { 5043 error = EISDIR; 5044 } else { 5045 error = VOP_FDISCARD(vp, pos, len); 5046 } 5047 VOP_UNLOCK(vp); 5048 5049 fail: 5050 fd_putfile(fd); 5051 return error; 5052 } 5053