1 /* $NetBSD: vfs_syscalls.c,v 1.463 2013/01/13 08:15:03 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.463 2013/01/13 08:15:03 dholland Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/quota.h> 98 #include <sys/quotactl.h> 99 #include <sys/ktrace.h> 100 #ifdef FILEASSOC 101 #include <sys/fileassoc.h> 102 #endif /* FILEASSOC */ 103 #include <sys/extattr.h> 104 #include <sys/verified_exec.h> 105 #include <sys/kauth.h> 106 #include <sys/atomic.h> 107 #include <sys/module.h> 108 #include <sys/buf.h> 109 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 #include <miscfs/specfs/specdev.h> 113 114 #include <nfs/rpcv2.h> 115 #include <nfs/nfsproto.h> 116 #include <nfs/nfs.h> 117 #include <nfs/nfs_var.h> 118 119 static int change_flags(struct vnode *, u_long, struct lwp *); 120 static int change_mode(struct vnode *, int, struct lwp *l); 121 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 122 static int do_open(lwp_t *, struct vnode *, struct pathbuf *, int, int, int *); 123 static int do_sys_openat(lwp_t *, int, const char *, int, int, int *); 124 static int do_sys_mknodat(struct lwp *, int, const char *, mode_t, 125 dev_t, register_t *, enum uio_seg); 126 static int do_sys_mkdirat(struct lwp *l, int, const char *, mode_t, 127 enum uio_seg); 128 static int do_sys_mkfifoat(struct lwp *, int, const char *, mode_t); 129 static int do_sys_chmodat(struct lwp *, int, const char *, int, int); 130 static int do_sys_chownat(struct lwp *, int, const char *, uid_t, gid_t, int); 131 static int do_sys_utimensat(struct lwp *, int, struct vnode *, 132 const char *, int, const struct timespec *, enum uio_seg); 133 static int do_sys_accessat(struct lwp *, int, const char *, int ,int); 134 static int do_sys_statat(struct lwp *, int, const char *, unsigned int, 135 struct stat *); 136 static int do_sys_symlinkat(struct lwp *, const char *, int, const char *, 137 enum uio_seg); 138 static int do_sys_linkat(struct lwp *, int, const char *, int, const char *, 139 int, register_t *); 140 static int do_sys_renameat(struct lwp *l, int, const char *, int, const char *, 141 enum uio_seg, int); 142 static int do_sys_readlinkat(struct lwp *, int, const char *, char *, 143 size_t, register_t *); 144 static int do_sys_unlinkat(struct lwp *, int, const char *, int, enum uio_seg); 145 146 static int fd_nameiat(struct lwp *, int, struct nameidata *); 147 static int fd_nameiat_simple_user(struct lwp *, int, const char *, 148 namei_simple_flags_t, struct vnode **); 149 150 151 /* 152 * This table is used to maintain compatibility with 4.3BSD 153 * and NetBSD 0.9 mount syscalls - and possibly other systems. 154 * Note, the order is important! 155 * 156 * Do not modify this table. It should only contain filesystems 157 * supported by NetBSD 0.9 and 4.3BSD. 158 */ 159 const char * const mountcompatnames[] = { 160 NULL, /* 0 = MOUNT_NONE */ 161 MOUNT_FFS, /* 1 = MOUNT_UFS */ 162 MOUNT_NFS, /* 2 */ 163 MOUNT_MFS, /* 3 */ 164 MOUNT_MSDOS, /* 4 */ 165 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 166 MOUNT_FDESC, /* 6 */ 167 MOUNT_KERNFS, /* 7 */ 168 NULL, /* 8 = MOUNT_DEVFS */ 169 MOUNT_AFS, /* 9 */ 170 }; 171 172 const int nmountcompatnames = __arraycount(mountcompatnames); 173 174 static int 175 fd_nameiat(struct lwp *l, int fdat, struct nameidata *ndp) 176 { 177 file_t *dfp; 178 int error; 179 180 if (fdat != AT_FDCWD) { 181 if ((error = fd_getvnode(fdat, &dfp)) != 0) 182 goto out; 183 184 NDAT(ndp, dfp->f_data); 185 } 186 187 error = namei(ndp); 188 189 if (fdat != AT_FDCWD) 190 fd_putfile(fdat); 191 out: 192 return error; 193 } 194 195 static int 196 fd_nameiat_simple_user(struct lwp *l, int fdat, const char *path, 197 namei_simple_flags_t sflags, struct vnode **vp_ret) 198 { 199 file_t *dfp; 200 struct vnode *dvp; 201 int error; 202 203 if (fdat != AT_FDCWD) { 204 if ((error = fd_getvnode(fdat, &dfp)) != 0) 205 goto out; 206 207 dvp = dfp->f_data; 208 } else { 209 dvp = NULL; 210 } 211 212 error = nameiat_simple_user(dvp, path, sflags, vp_ret); 213 214 if (fdat != AT_FDCWD) 215 fd_putfile(fdat); 216 out: 217 return error; 218 } 219 220 static int 221 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 222 { 223 int error; 224 225 fp->f_flag = flags & FMASK; 226 fp->f_type = DTYPE_VNODE; 227 fp->f_ops = &vnops; 228 fp->f_data = vp; 229 230 if (flags & (O_EXLOCK | O_SHLOCK)) { 231 struct flock lf; 232 int type; 233 234 lf.l_whence = SEEK_SET; 235 lf.l_start = 0; 236 lf.l_len = 0; 237 if (flags & O_EXLOCK) 238 lf.l_type = F_WRLCK; 239 else 240 lf.l_type = F_RDLCK; 241 type = F_FLOCK; 242 if ((flags & FNONBLOCK) == 0) 243 type |= F_WAIT; 244 VOP_UNLOCK(vp); 245 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 246 if (error) { 247 (void) vn_close(vp, fp->f_flag, fp->f_cred); 248 fd_abort(l->l_proc, fp, indx); 249 return error; 250 } 251 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 252 atomic_or_uint(&fp->f_flag, FHASLOCK); 253 } 254 if (flags & O_CLOEXEC) 255 fd_set_exclose(l, indx, true); 256 return 0; 257 } 258 259 static int 260 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 261 void *data, size_t *data_len) 262 { 263 struct mount *mp; 264 int error = 0, saved_flags; 265 266 mp = vp->v_mount; 267 saved_flags = mp->mnt_flag; 268 269 /* We can operate only on VV_ROOT nodes. */ 270 if ((vp->v_vflag & VV_ROOT) == 0) { 271 error = EINVAL; 272 goto out; 273 } 274 275 /* 276 * We only allow the filesystem to be reloaded if it 277 * is currently mounted read-only. Additionally, we 278 * prevent read-write to read-only downgrades. 279 */ 280 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 281 (mp->mnt_flag & MNT_RDONLY) == 0 && 282 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 283 error = EOPNOTSUPP; /* Needs translation */ 284 goto out; 285 } 286 287 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 288 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 289 if (error) 290 goto out; 291 292 if (vfs_busy(mp, NULL)) { 293 error = EPERM; 294 goto out; 295 } 296 297 mutex_enter(&mp->mnt_updating); 298 299 mp->mnt_flag &= ~MNT_OP_FLAGS; 300 mp->mnt_flag |= flags & MNT_OP_FLAGS; 301 302 /* 303 * Set the mount level flags. 304 */ 305 if (flags & MNT_RDONLY) 306 mp->mnt_flag |= MNT_RDONLY; 307 else if (mp->mnt_flag & MNT_RDONLY) 308 mp->mnt_iflag |= IMNT_WANTRDWR; 309 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 310 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 311 error = VFS_MOUNT(mp, path, data, data_len); 312 313 if (error && data != NULL) { 314 int error2; 315 316 /* 317 * Update failed; let's try and see if it was an 318 * export request. For compat with 3.0 and earlier. 319 */ 320 error2 = vfs_hooks_reexport(mp, path, data); 321 322 /* 323 * Only update error code if the export request was 324 * understood but some problem occurred while 325 * processing it. 326 */ 327 if (error2 != EJUSTRETURN) 328 error = error2; 329 } 330 331 if (mp->mnt_iflag & IMNT_WANTRDWR) 332 mp->mnt_flag &= ~MNT_RDONLY; 333 if (error) 334 mp->mnt_flag = saved_flags; 335 mp->mnt_flag &= ~MNT_OP_FLAGS; 336 mp->mnt_iflag &= ~IMNT_WANTRDWR; 337 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 338 if (mp->mnt_syncer == NULL) 339 error = vfs_allocate_syncvnode(mp); 340 } else { 341 if (mp->mnt_syncer != NULL) 342 vfs_deallocate_syncvnode(mp); 343 } 344 mutex_exit(&mp->mnt_updating); 345 vfs_unbusy(mp, false, NULL); 346 347 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 348 (flags & MNT_EXTATTR)) { 349 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 350 NULL, 0, NULL) != 0) { 351 printf("%s: failed to start extattr, error = %d", 352 mp->mnt_stat.f_mntonname, error); 353 mp->mnt_flag &= ~MNT_EXTATTR; 354 } 355 } 356 357 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 358 !(flags & MNT_EXTATTR)) { 359 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 360 NULL, 0, NULL) != 0) { 361 printf("%s: failed to stop extattr, error = %d", 362 mp->mnt_stat.f_mntonname, error); 363 mp->mnt_flag |= MNT_RDONLY; 364 } 365 } 366 out: 367 return (error); 368 } 369 370 static int 371 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 372 { 373 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 374 int error; 375 376 /* Copy file-system type from userspace. */ 377 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 378 if (error) { 379 /* 380 * Historically, filesystem types were identified by numbers. 381 * If we get an integer for the filesystem type instead of a 382 * string, we check to see if it matches one of the historic 383 * filesystem types. 384 */ 385 u_long fsindex = (u_long)fstype; 386 if (fsindex >= nmountcompatnames || 387 mountcompatnames[fsindex] == NULL) 388 return ENODEV; 389 strlcpy(fstypename, mountcompatnames[fsindex], 390 sizeof(fstypename)); 391 } 392 393 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 394 if (strcmp(fstypename, "ufs") == 0) 395 fstypename[0] = 'f'; 396 397 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 398 return 0; 399 400 /* If we can autoload a vfs module, try again */ 401 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 402 403 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 404 return 0; 405 406 return ENODEV; 407 } 408 409 static int 410 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 411 void *data, size_t *data_len) 412 { 413 struct mount *mp; 414 int error; 415 416 /* If MNT_GETARGS is specified, it should be the only flag. */ 417 if (flags & ~MNT_GETARGS) 418 return EINVAL; 419 420 mp = vp->v_mount; 421 422 /* XXX: probably some notion of "can see" here if we want isolation. */ 423 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 424 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 425 if (error) 426 return error; 427 428 if ((vp->v_vflag & VV_ROOT) == 0) 429 return EINVAL; 430 431 if (vfs_busy(mp, NULL)) 432 return EPERM; 433 434 mutex_enter(&mp->mnt_updating); 435 mp->mnt_flag &= ~MNT_OP_FLAGS; 436 mp->mnt_flag |= MNT_GETARGS; 437 error = VFS_MOUNT(mp, path, data, data_len); 438 mp->mnt_flag &= ~MNT_OP_FLAGS; 439 mutex_exit(&mp->mnt_updating); 440 441 vfs_unbusy(mp, false, NULL); 442 return (error); 443 } 444 445 int 446 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 447 { 448 /* { 449 syscallarg(const char *) type; 450 syscallarg(const char *) path; 451 syscallarg(int) flags; 452 syscallarg(void *) data; 453 syscallarg(size_t) data_len; 454 } */ 455 456 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 457 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 458 SCARG(uap, data_len), retval); 459 } 460 461 int 462 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 463 const char *path, int flags, void *data, enum uio_seg data_seg, 464 size_t data_len, register_t *retval) 465 { 466 struct vnode *vp; 467 void *data_buf = data; 468 bool vfsopsrele = false; 469 int error; 470 471 /* XXX: The calling convention of this routine is totally bizarre */ 472 if (vfsops) 473 vfsopsrele = true; 474 475 /* 476 * Get vnode to be covered 477 */ 478 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 479 if (error != 0) { 480 vp = NULL; 481 goto done; 482 } 483 484 if (vfsops == NULL) { 485 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 486 vfsops = vp->v_mount->mnt_op; 487 } else { 488 /* 'type' is userspace */ 489 error = mount_get_vfsops(type, &vfsops); 490 if (error != 0) 491 goto done; 492 vfsopsrele = true; 493 } 494 } 495 496 if (data != NULL && data_seg == UIO_USERSPACE) { 497 if (data_len == 0) { 498 /* No length supplied, use default for filesystem */ 499 data_len = vfsops->vfs_min_mount_data; 500 if (data_len > VFS_MAX_MOUNT_DATA) { 501 error = EINVAL; 502 goto done; 503 } 504 /* 505 * Hopefully a longer buffer won't make copyin() fail. 506 * For compatibility with 3.0 and earlier. 507 */ 508 if (flags & MNT_UPDATE 509 && data_len < sizeof (struct mnt_export_args30)) 510 data_len = sizeof (struct mnt_export_args30); 511 } 512 data_buf = kmem_alloc(data_len, KM_SLEEP); 513 514 /* NFS needs the buffer even for mnt_getargs .... */ 515 error = copyin(data, data_buf, data_len); 516 if (error != 0) 517 goto done; 518 } 519 520 if (flags & MNT_GETARGS) { 521 if (data_len == 0) { 522 error = EINVAL; 523 goto done; 524 } 525 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 526 if (error != 0) 527 goto done; 528 if (data_seg == UIO_USERSPACE) 529 error = copyout(data_buf, data, data_len); 530 *retval = data_len; 531 } else if (flags & MNT_UPDATE) { 532 error = mount_update(l, vp, path, flags, data_buf, &data_len); 533 } else { 534 /* Locking is handled internally in mount_domount(). */ 535 KASSERT(vfsopsrele == true); 536 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 537 &data_len); 538 vfsopsrele = false; 539 } 540 541 done: 542 if (vfsopsrele) 543 vfs_delref(vfsops); 544 if (vp != NULL) { 545 vrele(vp); 546 } 547 if (data_buf != data) 548 kmem_free(data_buf, data_len); 549 return (error); 550 } 551 552 /* 553 * Unmount a file system. 554 * 555 * Note: unmount takes a path to the vnode mounted on as argument, 556 * not special file (as before). 557 */ 558 /* ARGSUSED */ 559 int 560 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 561 { 562 /* { 563 syscallarg(const char *) path; 564 syscallarg(int) flags; 565 } */ 566 struct vnode *vp; 567 struct mount *mp; 568 int error; 569 struct pathbuf *pb; 570 struct nameidata nd; 571 572 error = pathbuf_copyin(SCARG(uap, path), &pb); 573 if (error) { 574 return error; 575 } 576 577 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 578 if ((error = namei(&nd)) != 0) { 579 pathbuf_destroy(pb); 580 return error; 581 } 582 vp = nd.ni_vp; 583 pathbuf_destroy(pb); 584 585 mp = vp->v_mount; 586 atomic_inc_uint(&mp->mnt_refcnt); 587 VOP_UNLOCK(vp); 588 589 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 590 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 591 if (error) { 592 vrele(vp); 593 vfs_destroy(mp); 594 return (error); 595 } 596 597 /* 598 * Don't allow unmounting the root file system. 599 */ 600 if (mp->mnt_flag & MNT_ROOTFS) { 601 vrele(vp); 602 vfs_destroy(mp); 603 return (EINVAL); 604 } 605 606 /* 607 * Must be the root of the filesystem 608 */ 609 if ((vp->v_vflag & VV_ROOT) == 0) { 610 vrele(vp); 611 vfs_destroy(mp); 612 return (EINVAL); 613 } 614 615 vrele(vp); 616 error = dounmount(mp, SCARG(uap, flags), l); 617 vfs_destroy(mp); 618 return error; 619 } 620 621 /* 622 * Sync each mounted filesystem. 623 */ 624 #ifdef DEBUG 625 int syncprt = 0; 626 struct ctldebug debug0 = { "syncprt", &syncprt }; 627 #endif 628 629 void 630 do_sys_sync(struct lwp *l) 631 { 632 struct mount *mp, *nmp; 633 int asyncflag; 634 635 mutex_enter(&mountlist_lock); 636 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 637 mp = nmp) { 638 if (vfs_busy(mp, &nmp)) { 639 continue; 640 } 641 mutex_enter(&mp->mnt_updating); 642 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 643 asyncflag = mp->mnt_flag & MNT_ASYNC; 644 mp->mnt_flag &= ~MNT_ASYNC; 645 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 646 if (asyncflag) 647 mp->mnt_flag |= MNT_ASYNC; 648 } 649 mutex_exit(&mp->mnt_updating); 650 vfs_unbusy(mp, false, &nmp); 651 } 652 mutex_exit(&mountlist_lock); 653 #ifdef DEBUG 654 if (syncprt) 655 vfs_bufstats(); 656 #endif /* DEBUG */ 657 } 658 659 /* ARGSUSED */ 660 int 661 sys_sync(struct lwp *l, const void *v, register_t *retval) 662 { 663 do_sys_sync(l); 664 return (0); 665 } 666 667 668 /* 669 * Access or change filesystem quotas. 670 * 671 * (this is really 14 different calls bundled into one) 672 */ 673 674 static int 675 do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 676 { 677 struct quotastat info_k; 678 int error; 679 680 /* ensure any padding bytes are cleared */ 681 memset(&info_k, 0, sizeof(info_k)); 682 683 error = vfs_quotactl_stat(mp, &info_k); 684 if (error) { 685 return error; 686 } 687 688 return copyout(&info_k, info_u, sizeof(info_k)); 689 } 690 691 static int 692 do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 693 struct quotaidtypestat *info_u) 694 { 695 struct quotaidtypestat info_k; 696 int error; 697 698 /* ensure any padding bytes are cleared */ 699 memset(&info_k, 0, sizeof(info_k)); 700 701 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 702 if (error) { 703 return error; 704 } 705 706 return copyout(&info_k, info_u, sizeof(info_k)); 707 } 708 709 static int 710 do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 711 struct quotaobjtypestat *info_u) 712 { 713 struct quotaobjtypestat info_k; 714 int error; 715 716 /* ensure any padding bytes are cleared */ 717 memset(&info_k, 0, sizeof(info_k)); 718 719 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 720 if (error) { 721 return error; 722 } 723 724 return copyout(&info_k, info_u, sizeof(info_k)); 725 } 726 727 static int 728 do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 729 struct quotaval *val_u) 730 { 731 struct quotakey key_k; 732 struct quotaval val_k; 733 int error; 734 735 /* ensure any padding bytes are cleared */ 736 memset(&val_k, 0, sizeof(val_k)); 737 738 error = copyin(key_u, &key_k, sizeof(key_k)); 739 if (error) { 740 return error; 741 } 742 743 error = vfs_quotactl_get(mp, &key_k, &val_k); 744 if (error) { 745 return error; 746 } 747 748 return copyout(&val_k, val_u, sizeof(val_k)); 749 } 750 751 static int 752 do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 753 const struct quotaval *val_u) 754 { 755 struct quotakey key_k; 756 struct quotaval val_k; 757 int error; 758 759 error = copyin(key_u, &key_k, sizeof(key_k)); 760 if (error) { 761 return error; 762 } 763 764 error = copyin(val_u, &val_k, sizeof(val_k)); 765 if (error) { 766 return error; 767 } 768 769 return vfs_quotactl_put(mp, &key_k, &val_k); 770 } 771 772 static int 773 do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u) 774 { 775 struct quotakey key_k; 776 int error; 777 778 error = copyin(key_u, &key_k, sizeof(key_k)); 779 if (error) { 780 return error; 781 } 782 783 return vfs_quotactl_delete(mp, &key_k); 784 } 785 786 static int 787 do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 788 { 789 struct quotakcursor cursor_k; 790 int error; 791 792 /* ensure any padding bytes are cleared */ 793 memset(&cursor_k, 0, sizeof(cursor_k)); 794 795 error = vfs_quotactl_cursoropen(mp, &cursor_k); 796 if (error) { 797 return error; 798 } 799 800 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 801 } 802 803 static int 804 do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 805 { 806 struct quotakcursor cursor_k; 807 int error; 808 809 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 810 if (error) { 811 return error; 812 } 813 814 return vfs_quotactl_cursorclose(mp, &cursor_k); 815 } 816 817 static int 818 do_sys_quotactl_cursorskipidtype(struct mount *mp, 819 struct quotakcursor *cursor_u, int idtype) 820 { 821 struct quotakcursor cursor_k; 822 int error; 823 824 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 825 if (error) { 826 return error; 827 } 828 829 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 830 if (error) { 831 return error; 832 } 833 834 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 835 } 836 837 static int 838 do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 839 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 840 unsigned *ret_u) 841 { 842 #define CGET_STACK_MAX 8 843 struct quotakcursor cursor_k; 844 struct quotakey stackkeys[CGET_STACK_MAX]; 845 struct quotaval stackvals[CGET_STACK_MAX]; 846 struct quotakey *keys_k; 847 struct quotaval *vals_k; 848 unsigned ret_k; 849 int error; 850 851 if (maxnum > 128) { 852 maxnum = 128; 853 } 854 855 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 856 if (error) { 857 return error; 858 } 859 860 if (maxnum <= CGET_STACK_MAX) { 861 keys_k = stackkeys; 862 vals_k = stackvals; 863 /* ensure any padding bytes are cleared */ 864 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 865 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 866 } else { 867 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 868 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 869 } 870 871 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 872 &ret_k); 873 if (error) { 874 goto fail; 875 } 876 877 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 878 if (error) { 879 goto fail; 880 } 881 882 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 883 if (error) { 884 goto fail; 885 } 886 887 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 888 if (error) { 889 goto fail; 890 } 891 892 /* do last to maximize the chance of being able to recover a failure */ 893 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 894 895 fail: 896 if (keys_k != stackkeys) { 897 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 898 } 899 if (vals_k != stackvals) { 900 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 901 } 902 return error; 903 } 904 905 static int 906 do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 907 int *ret_u) 908 { 909 struct quotakcursor cursor_k; 910 int ret_k; 911 int error; 912 913 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 914 if (error) { 915 return error; 916 } 917 918 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 919 if (error) { 920 return error; 921 } 922 923 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 924 if (error) { 925 return error; 926 } 927 928 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 929 } 930 931 static int 932 do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 933 { 934 struct quotakcursor cursor_k; 935 int error; 936 937 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 938 if (error) { 939 return error; 940 } 941 942 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 943 if (error) { 944 return error; 945 } 946 947 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 948 } 949 950 static int 951 do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 952 { 953 char *path_k; 954 int error; 955 956 /* XXX this should probably be a struct pathbuf */ 957 path_k = PNBUF_GET(); 958 error = copyin(path_u, path_k, PATH_MAX); 959 if (error) { 960 PNBUF_PUT(path_k); 961 return error; 962 } 963 964 error = vfs_quotactl_quotaon(mp, idtype, path_k); 965 966 PNBUF_PUT(path_k); 967 return error; 968 } 969 970 static int 971 do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 972 { 973 return vfs_quotactl_quotaoff(mp, idtype); 974 } 975 976 int 977 do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 978 { 979 struct mount *mp; 980 struct vnode *vp; 981 int error; 982 983 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 984 if (error != 0) 985 return (error); 986 mp = vp->v_mount; 987 988 switch (args->qc_op) { 989 case QUOTACTL_STAT: 990 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 991 break; 992 case QUOTACTL_IDTYPESTAT: 993 error = do_sys_quotactl_idtypestat(mp, 994 args->u.idtypestat.qc_idtype, 995 args->u.idtypestat.qc_info); 996 break; 997 case QUOTACTL_OBJTYPESTAT: 998 error = do_sys_quotactl_objtypestat(mp, 999 args->u.objtypestat.qc_objtype, 1000 args->u.objtypestat.qc_info); 1001 break; 1002 case QUOTACTL_GET: 1003 error = do_sys_quotactl_get(mp, 1004 args->u.get.qc_key, 1005 args->u.get.qc_val); 1006 break; 1007 case QUOTACTL_PUT: 1008 error = do_sys_quotactl_put(mp, 1009 args->u.put.qc_key, 1010 args->u.put.qc_val); 1011 break; 1012 case QUOTACTL_DELETE: 1013 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key); 1014 break; 1015 case QUOTACTL_CURSOROPEN: 1016 error = do_sys_quotactl_cursoropen(mp, 1017 args->u.cursoropen.qc_cursor); 1018 break; 1019 case QUOTACTL_CURSORCLOSE: 1020 error = do_sys_quotactl_cursorclose(mp, 1021 args->u.cursorclose.qc_cursor); 1022 break; 1023 case QUOTACTL_CURSORSKIPIDTYPE: 1024 error = do_sys_quotactl_cursorskipidtype(mp, 1025 args->u.cursorskipidtype.qc_cursor, 1026 args->u.cursorskipidtype.qc_idtype); 1027 break; 1028 case QUOTACTL_CURSORGET: 1029 error = do_sys_quotactl_cursorget(mp, 1030 args->u.cursorget.qc_cursor, 1031 args->u.cursorget.qc_keys, 1032 args->u.cursorget.qc_vals, 1033 args->u.cursorget.qc_maxnum, 1034 args->u.cursorget.qc_ret); 1035 break; 1036 case QUOTACTL_CURSORATEND: 1037 error = do_sys_quotactl_cursoratend(mp, 1038 args->u.cursoratend.qc_cursor, 1039 args->u.cursoratend.qc_ret); 1040 break; 1041 case QUOTACTL_CURSORREWIND: 1042 error = do_sys_quotactl_cursorrewind(mp, 1043 args->u.cursorrewind.qc_cursor); 1044 break; 1045 case QUOTACTL_QUOTAON: 1046 error = do_sys_quotactl_quotaon(mp, 1047 args->u.quotaon.qc_idtype, 1048 args->u.quotaon.qc_quotafile); 1049 break; 1050 case QUOTACTL_QUOTAOFF: 1051 error = do_sys_quotactl_quotaoff(mp, 1052 args->u.quotaoff.qc_idtype); 1053 break; 1054 default: 1055 error = EINVAL; 1056 break; 1057 } 1058 1059 vrele(vp); 1060 return error; 1061 } 1062 1063 /* ARGSUSED */ 1064 int 1065 sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1066 register_t *retval) 1067 { 1068 /* { 1069 syscallarg(const char *) path; 1070 syscallarg(struct quotactl_args *) args; 1071 } */ 1072 struct quotactl_args args; 1073 int error; 1074 1075 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1076 if (error) { 1077 return error; 1078 } 1079 1080 return do_sys_quotactl(SCARG(uap, path), &args); 1081 } 1082 1083 int 1084 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1085 int root) 1086 { 1087 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1088 int error = 0; 1089 1090 /* 1091 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1092 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1093 * overrides MNT_NOWAIT. 1094 */ 1095 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1096 (flags != MNT_WAIT && flags != 0)) { 1097 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1098 goto done; 1099 } 1100 1101 /* Get the filesystem stats now */ 1102 memset(sp, 0, sizeof(*sp)); 1103 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1104 return error; 1105 } 1106 1107 if (cwdi->cwdi_rdir == NULL) 1108 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1109 done: 1110 if (cwdi->cwdi_rdir != NULL) { 1111 size_t len; 1112 char *bp; 1113 char c; 1114 char *path = PNBUF_GET(); 1115 1116 bp = path + MAXPATHLEN; 1117 *--bp = '\0'; 1118 rw_enter(&cwdi->cwdi_lock, RW_READER); 1119 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1120 MAXPATHLEN / 2, 0, l); 1121 rw_exit(&cwdi->cwdi_lock); 1122 if (error) { 1123 PNBUF_PUT(path); 1124 return error; 1125 } 1126 len = strlen(bp); 1127 if (len != 1) { 1128 /* 1129 * for mount points that are below our root, we can see 1130 * them, so we fix up the pathname and return them. The 1131 * rest we cannot see, so we don't allow viewing the 1132 * data. 1133 */ 1134 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1135 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1136 (void)strlcpy(sp->f_mntonname, 1137 c == '\0' ? "/" : &sp->f_mntonname[len], 1138 sizeof(sp->f_mntonname)); 1139 } else { 1140 if (root) 1141 (void)strlcpy(sp->f_mntonname, "/", 1142 sizeof(sp->f_mntonname)); 1143 else 1144 error = EPERM; 1145 } 1146 } 1147 PNBUF_PUT(path); 1148 } 1149 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1150 return error; 1151 } 1152 1153 /* 1154 * Get filesystem statistics by path. 1155 */ 1156 int 1157 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1158 { 1159 struct mount *mp; 1160 int error; 1161 struct vnode *vp; 1162 1163 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1164 if (error != 0) 1165 return error; 1166 mp = vp->v_mount; 1167 error = dostatvfs(mp, sb, l, flags, 1); 1168 vrele(vp); 1169 return error; 1170 } 1171 1172 /* ARGSUSED */ 1173 int 1174 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1175 { 1176 /* { 1177 syscallarg(const char *) path; 1178 syscallarg(struct statvfs *) buf; 1179 syscallarg(int) flags; 1180 } */ 1181 struct statvfs *sb; 1182 int error; 1183 1184 sb = STATVFSBUF_GET(); 1185 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1186 if (error == 0) 1187 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1188 STATVFSBUF_PUT(sb); 1189 return error; 1190 } 1191 1192 /* 1193 * Get filesystem statistics by fd. 1194 */ 1195 int 1196 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1197 { 1198 file_t *fp; 1199 struct mount *mp; 1200 int error; 1201 1202 /* fd_getvnode() will use the descriptor for us */ 1203 if ((error = fd_getvnode(fd, &fp)) != 0) 1204 return (error); 1205 mp = ((struct vnode *)fp->f_data)->v_mount; 1206 error = dostatvfs(mp, sb, curlwp, flags, 1); 1207 fd_putfile(fd); 1208 return error; 1209 } 1210 1211 /* ARGSUSED */ 1212 int 1213 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1214 { 1215 /* { 1216 syscallarg(int) fd; 1217 syscallarg(struct statvfs *) buf; 1218 syscallarg(int) flags; 1219 } */ 1220 struct statvfs *sb; 1221 int error; 1222 1223 sb = STATVFSBUF_GET(); 1224 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1225 if (error == 0) 1226 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1227 STATVFSBUF_PUT(sb); 1228 return error; 1229 } 1230 1231 1232 /* 1233 * Get statistics on all filesystems. 1234 */ 1235 int 1236 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1237 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1238 register_t *retval) 1239 { 1240 int root = 0; 1241 struct proc *p = l->l_proc; 1242 struct mount *mp, *nmp; 1243 struct statvfs *sb; 1244 size_t count, maxcount; 1245 int error = 0; 1246 1247 sb = STATVFSBUF_GET(); 1248 maxcount = bufsize / entry_sz; 1249 mutex_enter(&mountlist_lock); 1250 count = 0; 1251 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1252 mp = nmp) { 1253 if (vfs_busy(mp, &nmp)) { 1254 continue; 1255 } 1256 if (sfsp && count < maxcount) { 1257 error = dostatvfs(mp, sb, l, flags, 0); 1258 if (error) { 1259 vfs_unbusy(mp, false, &nmp); 1260 error = 0; 1261 continue; 1262 } 1263 error = copyfn(sb, sfsp, entry_sz); 1264 if (error) { 1265 vfs_unbusy(mp, false, NULL); 1266 goto out; 1267 } 1268 sfsp = (char *)sfsp + entry_sz; 1269 root |= strcmp(sb->f_mntonname, "/") == 0; 1270 } 1271 count++; 1272 vfs_unbusy(mp, false, &nmp); 1273 } 1274 mutex_exit(&mountlist_lock); 1275 1276 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1277 /* 1278 * fake a root entry 1279 */ 1280 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1281 sb, l, flags, 1); 1282 if (error != 0) 1283 goto out; 1284 if (sfsp) { 1285 error = copyfn(sb, sfsp, entry_sz); 1286 if (error != 0) 1287 goto out; 1288 } 1289 count++; 1290 } 1291 if (sfsp && count > maxcount) 1292 *retval = maxcount; 1293 else 1294 *retval = count; 1295 out: 1296 STATVFSBUF_PUT(sb); 1297 return error; 1298 } 1299 1300 int 1301 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1302 { 1303 /* { 1304 syscallarg(struct statvfs *) buf; 1305 syscallarg(size_t) bufsize; 1306 syscallarg(int) flags; 1307 } */ 1308 1309 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1310 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1311 } 1312 1313 /* 1314 * Change current working directory to a given file descriptor. 1315 */ 1316 /* ARGSUSED */ 1317 int 1318 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1319 { 1320 /* { 1321 syscallarg(int) fd; 1322 } */ 1323 struct proc *p = l->l_proc; 1324 struct cwdinfo *cwdi; 1325 struct vnode *vp, *tdp; 1326 struct mount *mp; 1327 file_t *fp; 1328 int error, fd; 1329 1330 /* fd_getvnode() will use the descriptor for us */ 1331 fd = SCARG(uap, fd); 1332 if ((error = fd_getvnode(fd, &fp)) != 0) 1333 return (error); 1334 vp = fp->f_data; 1335 1336 vref(vp); 1337 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1338 if (vp->v_type != VDIR) 1339 error = ENOTDIR; 1340 else 1341 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1342 if (error) { 1343 vput(vp); 1344 goto out; 1345 } 1346 while ((mp = vp->v_mountedhere) != NULL) { 1347 error = vfs_busy(mp, NULL); 1348 vput(vp); 1349 if (error != 0) 1350 goto out; 1351 error = VFS_ROOT(mp, &tdp); 1352 vfs_unbusy(mp, false, NULL); 1353 if (error) 1354 goto out; 1355 vp = tdp; 1356 } 1357 VOP_UNLOCK(vp); 1358 1359 /* 1360 * Disallow changing to a directory not under the process's 1361 * current root directory (if there is one). 1362 */ 1363 cwdi = p->p_cwdi; 1364 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1365 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1366 vrele(vp); 1367 error = EPERM; /* operation not permitted */ 1368 } else { 1369 vrele(cwdi->cwdi_cdir); 1370 cwdi->cwdi_cdir = vp; 1371 } 1372 rw_exit(&cwdi->cwdi_lock); 1373 1374 out: 1375 fd_putfile(fd); 1376 return (error); 1377 } 1378 1379 /* 1380 * Change this process's notion of the root directory to a given file 1381 * descriptor. 1382 */ 1383 int 1384 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1385 { 1386 struct proc *p = l->l_proc; 1387 struct vnode *vp; 1388 file_t *fp; 1389 int error, fd = SCARG(uap, fd); 1390 1391 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1392 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1393 return error; 1394 /* fd_getvnode() will use the descriptor for us */ 1395 if ((error = fd_getvnode(fd, &fp)) != 0) 1396 return error; 1397 vp = fp->f_data; 1398 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1399 if (vp->v_type != VDIR) 1400 error = ENOTDIR; 1401 else 1402 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1403 VOP_UNLOCK(vp); 1404 if (error) 1405 goto out; 1406 vref(vp); 1407 1408 change_root(p->p_cwdi, vp, l); 1409 1410 out: 1411 fd_putfile(fd); 1412 return (error); 1413 } 1414 1415 /* 1416 * Change current working directory (``.''). 1417 */ 1418 /* ARGSUSED */ 1419 int 1420 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1421 { 1422 /* { 1423 syscallarg(const char *) path; 1424 } */ 1425 struct proc *p = l->l_proc; 1426 struct cwdinfo *cwdi; 1427 int error; 1428 struct vnode *vp; 1429 1430 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1431 &vp, l)) != 0) 1432 return (error); 1433 cwdi = p->p_cwdi; 1434 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1435 vrele(cwdi->cwdi_cdir); 1436 cwdi->cwdi_cdir = vp; 1437 rw_exit(&cwdi->cwdi_lock); 1438 return (0); 1439 } 1440 1441 /* 1442 * Change notion of root (``/'') directory. 1443 */ 1444 /* ARGSUSED */ 1445 int 1446 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1447 { 1448 /* { 1449 syscallarg(const char *) path; 1450 } */ 1451 struct proc *p = l->l_proc; 1452 int error; 1453 struct vnode *vp; 1454 1455 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1456 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1457 return (error); 1458 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1459 &vp, l)) != 0) 1460 return (error); 1461 1462 change_root(p->p_cwdi, vp, l); 1463 1464 return (0); 1465 } 1466 1467 /* 1468 * Common routine for chroot and fchroot. 1469 * NB: callers need to properly authorize the change root operation. 1470 */ 1471 void 1472 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1473 { 1474 struct proc *p = l->l_proc; 1475 kauth_cred_t ncred; 1476 1477 ncred = kauth_cred_alloc(); 1478 1479 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1480 if (cwdi->cwdi_rdir != NULL) 1481 vrele(cwdi->cwdi_rdir); 1482 cwdi->cwdi_rdir = vp; 1483 1484 /* 1485 * Prevent escaping from chroot by putting the root under 1486 * the working directory. Silently chdir to / if we aren't 1487 * already there. 1488 */ 1489 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1490 /* 1491 * XXX would be more failsafe to change directory to a 1492 * deadfs node here instead 1493 */ 1494 vrele(cwdi->cwdi_cdir); 1495 vref(vp); 1496 cwdi->cwdi_cdir = vp; 1497 } 1498 rw_exit(&cwdi->cwdi_lock); 1499 1500 /* Get a write lock on the process credential. */ 1501 proc_crmod_enter(); 1502 1503 kauth_cred_clone(p->p_cred, ncred); 1504 kauth_proc_chroot(ncred, p->p_cwdi); 1505 1506 /* Broadcast our credentials to the process and other LWPs. */ 1507 proc_crmod_leave(ncred, p->p_cred, true); 1508 } 1509 1510 /* 1511 * Common routine for chroot and chdir. 1512 * XXX "where" should be enum uio_seg 1513 */ 1514 int 1515 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1516 { 1517 struct pathbuf *pb; 1518 struct nameidata nd; 1519 int error; 1520 1521 error = pathbuf_maybe_copyin(path, where, &pb); 1522 if (error) { 1523 return error; 1524 } 1525 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1526 if ((error = namei(&nd)) != 0) { 1527 pathbuf_destroy(pb); 1528 return error; 1529 } 1530 *vpp = nd.ni_vp; 1531 pathbuf_destroy(pb); 1532 1533 if ((*vpp)->v_type != VDIR) 1534 error = ENOTDIR; 1535 else 1536 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1537 1538 if (error) 1539 vput(*vpp); 1540 else 1541 VOP_UNLOCK(*vpp); 1542 return (error); 1543 } 1544 1545 /* 1546 * Internals of sys_open - path has already been converted into a pathbuf 1547 * (so we can easily reuse this function from other parts of the kernel, 1548 * like posix_spawn post-processing). 1549 */ 1550 static int 1551 do_open(lwp_t *l, struct vnode *dvp, struct pathbuf *pb, int open_flags, 1552 int open_mode, int *fd) 1553 { 1554 struct proc *p = l->l_proc; 1555 struct cwdinfo *cwdi = p->p_cwdi; 1556 file_t *fp; 1557 struct vnode *vp; 1558 int flags, cmode; 1559 int indx, error; 1560 struct nameidata nd; 1561 1562 if (open_flags & O_SEARCH) { 1563 open_flags &= ~(int)O_SEARCH; 1564 } 1565 1566 flags = FFLAGS(open_flags); 1567 if ((flags & (FREAD | FWRITE)) == 0) 1568 return EINVAL; 1569 1570 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1571 return error; 1572 } 1573 1574 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1575 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1576 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1577 if (dvp != NULL) 1578 NDAT(&nd, dvp); 1579 1580 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1581 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1582 fd_abort(p, fp, indx); 1583 if ((error == EDUPFD || error == EMOVEFD) && 1584 l->l_dupfd >= 0 && /* XXX from fdopen */ 1585 (error = 1586 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1587 *fd = indx; 1588 return 0; 1589 } 1590 if (error == ERESTART) 1591 error = EINTR; 1592 return error; 1593 } 1594 1595 l->l_dupfd = 0; 1596 vp = nd.ni_vp; 1597 1598 if ((error = open_setfp(l, fp, vp, indx, flags))) 1599 return error; 1600 1601 VOP_UNLOCK(vp); 1602 *fd = indx; 1603 fd_affix(p, fp, indx); 1604 return 0; 1605 } 1606 1607 int 1608 fd_open(const char *path, int open_flags, int open_mode, int *fd) 1609 { 1610 struct pathbuf *pb; 1611 int error, oflags; 1612 1613 oflags = FFLAGS(open_flags); 1614 if ((oflags & (FREAD | FWRITE)) == 0) 1615 return EINVAL; 1616 1617 pb = pathbuf_create(path); 1618 if (pb == NULL) 1619 return ENOMEM; 1620 1621 error = do_open(curlwp, NULL, pb, open_flags, open_mode, fd); 1622 pathbuf_destroy(pb); 1623 1624 return error; 1625 } 1626 1627 /* 1628 * Check permissions, allocate an open file structure, 1629 * and call the device open routine if any. 1630 */ 1631 static int 1632 do_sys_openat(lwp_t *l, int fdat, const char *path, int flags, 1633 int mode, int *fd) 1634 { 1635 file_t *dfp = NULL; 1636 struct vnode *dvp = NULL; 1637 struct pathbuf *pb; 1638 int error; 1639 1640 error = pathbuf_copyin(path, &pb); 1641 if (error) 1642 return error; 1643 1644 if (fdat != AT_FDCWD) { 1645 /* fd_getvnode() will use the descriptor for us */ 1646 if ((error = fd_getvnode(fdat, &dfp)) != 0) 1647 goto out; 1648 1649 dvp = dfp->f_data; 1650 } 1651 1652 error = do_open(l, dvp, pb, flags, mode, fd); 1653 1654 if (dfp != NULL) 1655 fd_putfile(fdat); 1656 out: 1657 pathbuf_destroy(pb); 1658 return error; 1659 } 1660 1661 int 1662 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1663 { 1664 /* { 1665 syscallarg(const char *) path; 1666 syscallarg(int) flags; 1667 syscallarg(int) mode; 1668 } */ 1669 int error; 1670 int fd; 1671 1672 error = do_sys_openat(l, AT_FDCWD, SCARG(uap, path), 1673 SCARG(uap, flags), SCARG(uap, mode), &fd); 1674 1675 if (error == 0) 1676 *retval = fd; 1677 1678 return error; 1679 } 1680 1681 int 1682 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1683 { 1684 /* { 1685 syscallarg(int) fd; 1686 syscallarg(const char *) path; 1687 syscallarg(int) oflags; 1688 syscallarg(int) mode; 1689 } */ 1690 int error; 1691 int fd; 1692 1693 error = do_sys_openat(l, SCARG(uap, fd), SCARG(uap, path), 1694 SCARG(uap, oflags), SCARG(uap, mode), &fd); 1695 1696 if (error == 0) 1697 *retval = fd; 1698 1699 return error; 1700 } 1701 1702 static void 1703 vfs__fhfree(fhandle_t *fhp) 1704 { 1705 size_t fhsize; 1706 1707 if (fhp == NULL) { 1708 return; 1709 } 1710 fhsize = FHANDLE_SIZE(fhp); 1711 kmem_free(fhp, fhsize); 1712 } 1713 1714 /* 1715 * vfs_composefh: compose a filehandle. 1716 */ 1717 1718 int 1719 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1720 { 1721 struct mount *mp; 1722 struct fid *fidp; 1723 int error; 1724 size_t needfhsize; 1725 size_t fidsize; 1726 1727 mp = vp->v_mount; 1728 fidp = NULL; 1729 if (*fh_size < FHANDLE_SIZE_MIN) { 1730 fidsize = 0; 1731 } else { 1732 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1733 if (fhp != NULL) { 1734 memset(fhp, 0, *fh_size); 1735 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1736 fidp = &fhp->fh_fid; 1737 } 1738 } 1739 error = VFS_VPTOFH(vp, fidp, &fidsize); 1740 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1741 if (error == 0 && *fh_size < needfhsize) { 1742 error = E2BIG; 1743 } 1744 *fh_size = needfhsize; 1745 return error; 1746 } 1747 1748 int 1749 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1750 { 1751 struct mount *mp; 1752 fhandle_t *fhp; 1753 size_t fhsize; 1754 size_t fidsize; 1755 int error; 1756 1757 *fhpp = NULL; 1758 mp = vp->v_mount; 1759 fidsize = 0; 1760 error = VFS_VPTOFH(vp, NULL, &fidsize); 1761 KASSERT(error != 0); 1762 if (error != E2BIG) { 1763 goto out; 1764 } 1765 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1766 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1767 if (fhp == NULL) { 1768 error = ENOMEM; 1769 goto out; 1770 } 1771 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1772 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1773 if (error == 0) { 1774 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1775 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1776 *fhpp = fhp; 1777 } else { 1778 kmem_free(fhp, fhsize); 1779 } 1780 out: 1781 return error; 1782 } 1783 1784 void 1785 vfs_composefh_free(fhandle_t *fhp) 1786 { 1787 1788 vfs__fhfree(fhp); 1789 } 1790 1791 /* 1792 * vfs_fhtovp: lookup a vnode by a filehandle. 1793 */ 1794 1795 int 1796 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1797 { 1798 struct mount *mp; 1799 int error; 1800 1801 *vpp = NULL; 1802 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1803 if (mp == NULL) { 1804 error = ESTALE; 1805 goto out; 1806 } 1807 if (mp->mnt_op->vfs_fhtovp == NULL) { 1808 error = EOPNOTSUPP; 1809 goto out; 1810 } 1811 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1812 out: 1813 return error; 1814 } 1815 1816 /* 1817 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1818 * the needed size. 1819 */ 1820 1821 int 1822 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1823 { 1824 fhandle_t *fhp; 1825 int error; 1826 1827 *fhpp = NULL; 1828 if (fhsize > FHANDLE_SIZE_MAX) { 1829 return EINVAL; 1830 } 1831 if (fhsize < FHANDLE_SIZE_MIN) { 1832 return EINVAL; 1833 } 1834 again: 1835 fhp = kmem_alloc(fhsize, KM_SLEEP); 1836 if (fhp == NULL) { 1837 return ENOMEM; 1838 } 1839 error = copyin(ufhp, fhp, fhsize); 1840 if (error == 0) { 1841 /* XXX this check shouldn't be here */ 1842 if (FHANDLE_SIZE(fhp) == fhsize) { 1843 *fhpp = fhp; 1844 return 0; 1845 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1846 /* 1847 * a kludge for nfsv2 padded handles. 1848 */ 1849 size_t sz; 1850 1851 sz = FHANDLE_SIZE(fhp); 1852 kmem_free(fhp, fhsize); 1853 fhsize = sz; 1854 goto again; 1855 } else { 1856 /* 1857 * userland told us wrong size. 1858 */ 1859 error = EINVAL; 1860 } 1861 } 1862 kmem_free(fhp, fhsize); 1863 return error; 1864 } 1865 1866 void 1867 vfs_copyinfh_free(fhandle_t *fhp) 1868 { 1869 1870 vfs__fhfree(fhp); 1871 } 1872 1873 /* 1874 * Get file handle system call 1875 */ 1876 int 1877 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1878 { 1879 /* { 1880 syscallarg(char *) fname; 1881 syscallarg(fhandle_t *) fhp; 1882 syscallarg(size_t *) fh_size; 1883 } */ 1884 struct vnode *vp; 1885 fhandle_t *fh; 1886 int error; 1887 struct pathbuf *pb; 1888 struct nameidata nd; 1889 size_t sz; 1890 size_t usz; 1891 1892 /* 1893 * Must be super user 1894 */ 1895 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1896 0, NULL, NULL, NULL); 1897 if (error) 1898 return (error); 1899 1900 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1901 if (error) { 1902 return error; 1903 } 1904 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1905 error = namei(&nd); 1906 if (error) { 1907 pathbuf_destroy(pb); 1908 return error; 1909 } 1910 vp = nd.ni_vp; 1911 pathbuf_destroy(pb); 1912 1913 error = vfs_composefh_alloc(vp, &fh); 1914 vput(vp); 1915 if (error != 0) { 1916 goto out; 1917 } 1918 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1919 if (error != 0) { 1920 goto out; 1921 } 1922 sz = FHANDLE_SIZE(fh); 1923 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1924 if (error != 0) { 1925 goto out; 1926 } 1927 if (usz >= sz) { 1928 error = copyout(fh, SCARG(uap, fhp), sz); 1929 } else { 1930 error = E2BIG; 1931 } 1932 out: 1933 vfs_composefh_free(fh); 1934 return (error); 1935 } 1936 1937 /* 1938 * Open a file given a file handle. 1939 * 1940 * Check permissions, allocate an open file structure, 1941 * and call the device open routine if any. 1942 */ 1943 1944 int 1945 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1946 register_t *retval) 1947 { 1948 file_t *fp; 1949 struct vnode *vp = NULL; 1950 kauth_cred_t cred = l->l_cred; 1951 file_t *nfp; 1952 int indx, error = 0; 1953 struct vattr va; 1954 fhandle_t *fh; 1955 int flags; 1956 proc_t *p; 1957 1958 p = curproc; 1959 1960 /* 1961 * Must be super user 1962 */ 1963 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1964 0, NULL, NULL, NULL))) 1965 return (error); 1966 1967 if (oflags & O_SEARCH) { 1968 oflags &= ~(int)O_SEARCH; 1969 } 1970 1971 flags = FFLAGS(oflags); 1972 if ((flags & (FREAD | FWRITE)) == 0) 1973 return (EINVAL); 1974 if ((flags & O_CREAT)) 1975 return (EINVAL); 1976 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1977 return (error); 1978 fp = nfp; 1979 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1980 if (error != 0) { 1981 goto bad; 1982 } 1983 error = vfs_fhtovp(fh, &vp); 1984 if (error != 0) { 1985 goto bad; 1986 } 1987 1988 /* Now do an effective vn_open */ 1989 1990 if (vp->v_type == VSOCK) { 1991 error = EOPNOTSUPP; 1992 goto bad; 1993 } 1994 error = vn_openchk(vp, cred, flags); 1995 if (error != 0) 1996 goto bad; 1997 if (flags & O_TRUNC) { 1998 VOP_UNLOCK(vp); /* XXX */ 1999 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 2000 vattr_null(&va); 2001 va.va_size = 0; 2002 error = VOP_SETATTR(vp, &va, cred); 2003 if (error) 2004 goto bad; 2005 } 2006 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 2007 goto bad; 2008 if (flags & FWRITE) { 2009 mutex_enter(vp->v_interlock); 2010 vp->v_writecount++; 2011 mutex_exit(vp->v_interlock); 2012 } 2013 2014 /* done with modified vn_open, now finish what sys_open does. */ 2015 if ((error = open_setfp(l, fp, vp, indx, flags))) 2016 return error; 2017 2018 VOP_UNLOCK(vp); 2019 *retval = indx; 2020 fd_affix(p, fp, indx); 2021 vfs_copyinfh_free(fh); 2022 return (0); 2023 2024 bad: 2025 fd_abort(p, fp, indx); 2026 if (vp != NULL) 2027 vput(vp); 2028 vfs_copyinfh_free(fh); 2029 return (error); 2030 } 2031 2032 int 2033 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 2034 { 2035 /* { 2036 syscallarg(const void *) fhp; 2037 syscallarg(size_t) fh_size; 2038 syscallarg(int) flags; 2039 } */ 2040 2041 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 2042 SCARG(uap, flags), retval); 2043 } 2044 2045 int 2046 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 2047 { 2048 int error; 2049 fhandle_t *fh; 2050 struct vnode *vp; 2051 2052 /* 2053 * Must be super user 2054 */ 2055 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2056 0, NULL, NULL, NULL))) 2057 return (error); 2058 2059 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2060 if (error != 0) 2061 return error; 2062 2063 error = vfs_fhtovp(fh, &vp); 2064 vfs_copyinfh_free(fh); 2065 if (error != 0) 2066 return error; 2067 2068 error = vn_stat(vp, sb); 2069 vput(vp); 2070 return error; 2071 } 2072 2073 2074 /* ARGSUSED */ 2075 int 2076 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 2077 { 2078 /* { 2079 syscallarg(const void *) fhp; 2080 syscallarg(size_t) fh_size; 2081 syscallarg(struct stat *) sb; 2082 } */ 2083 struct stat sb; 2084 int error; 2085 2086 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 2087 if (error) 2088 return error; 2089 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2090 } 2091 2092 int 2093 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 2094 int flags) 2095 { 2096 fhandle_t *fh; 2097 struct mount *mp; 2098 struct vnode *vp; 2099 int error; 2100 2101 /* 2102 * Must be super user 2103 */ 2104 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 2105 0, NULL, NULL, NULL))) 2106 return error; 2107 2108 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 2109 if (error != 0) 2110 return error; 2111 2112 error = vfs_fhtovp(fh, &vp); 2113 vfs_copyinfh_free(fh); 2114 if (error != 0) 2115 return error; 2116 2117 mp = vp->v_mount; 2118 error = dostatvfs(mp, sb, l, flags, 1); 2119 vput(vp); 2120 return error; 2121 } 2122 2123 /* ARGSUSED */ 2124 int 2125 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2126 { 2127 /* { 2128 syscallarg(const void *) fhp; 2129 syscallarg(size_t) fh_size; 2130 syscallarg(struct statvfs *) buf; 2131 syscallarg(int) flags; 2132 } */ 2133 struct statvfs *sb = STATVFSBUF_GET(); 2134 int error; 2135 2136 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2137 SCARG(uap, flags)); 2138 if (error == 0) 2139 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2140 STATVFSBUF_PUT(sb); 2141 return error; 2142 } 2143 2144 /* 2145 * Create a special file. 2146 */ 2147 /* ARGSUSED */ 2148 int 2149 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2150 register_t *retval) 2151 { 2152 /* { 2153 syscallarg(const char *) path; 2154 syscallarg(mode_t) mode; 2155 syscallarg(dev_t) dev; 2156 } */ 2157 return do_sys_mknodat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode), 2158 SCARG(uap, dev), retval, UIO_USERSPACE); 2159 } 2160 2161 int 2162 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2163 register_t *retval) 2164 { 2165 /* { 2166 syscallarg(int) fd; 2167 syscallarg(const char *) path; 2168 syscallarg(mode_t) mode; 2169 syscallarg(uint32_t) dev; 2170 } */ 2171 2172 return do_sys_mknodat(l, SCARG(uap, fd), SCARG(uap, path), 2173 SCARG(uap, mode), SCARG(uap, dev), retval, UIO_USERSPACE); 2174 } 2175 2176 int 2177 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2178 register_t *retval, enum uio_seg seg) 2179 { 2180 return do_sys_mknodat(l, AT_FDCWD, pathname, mode, dev, retval, seg); 2181 } 2182 2183 int 2184 do_sys_mknodat(struct lwp *l, int fdat, const char *pathname, mode_t mode, 2185 dev_t dev, register_t *retval, enum uio_seg seg) 2186 { 2187 struct proc *p = l->l_proc; 2188 struct vnode *vp; 2189 struct vattr vattr; 2190 int error, optype; 2191 struct pathbuf *pb; 2192 struct nameidata nd; 2193 const char *pathstring; 2194 2195 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2196 0, NULL, NULL, NULL)) != 0) 2197 return (error); 2198 2199 optype = VOP_MKNOD_DESCOFFSET; 2200 2201 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2202 if (error) { 2203 return error; 2204 } 2205 pathstring = pathbuf_stringcopy_get(pb); 2206 if (pathstring == NULL) { 2207 pathbuf_destroy(pb); 2208 return ENOMEM; 2209 } 2210 2211 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2212 2213 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2214 goto out; 2215 vp = nd.ni_vp; 2216 2217 if (vp != NULL) 2218 error = EEXIST; 2219 else { 2220 vattr_null(&vattr); 2221 /* We will read cwdi->cwdi_cmask unlocked. */ 2222 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2223 vattr.va_rdev = dev; 2224 2225 switch (mode & S_IFMT) { 2226 case S_IFMT: /* used by badsect to flag bad sectors */ 2227 vattr.va_type = VBAD; 2228 break; 2229 case S_IFCHR: 2230 vattr.va_type = VCHR; 2231 break; 2232 case S_IFBLK: 2233 vattr.va_type = VBLK; 2234 break; 2235 case S_IFWHT: 2236 optype = VOP_WHITEOUT_DESCOFFSET; 2237 break; 2238 case S_IFREG: 2239 #if NVERIEXEC > 0 2240 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2241 O_CREAT); 2242 #endif /* NVERIEXEC > 0 */ 2243 vattr.va_type = VREG; 2244 vattr.va_rdev = VNOVAL; 2245 optype = VOP_CREATE_DESCOFFSET; 2246 break; 2247 default: 2248 error = EINVAL; 2249 break; 2250 } 2251 } 2252 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2253 && vattr.va_rdev == VNOVAL) 2254 error = EINVAL; 2255 if (!error) { 2256 switch (optype) { 2257 case VOP_WHITEOUT_DESCOFFSET: 2258 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2259 if (error) 2260 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2261 vput(nd.ni_dvp); 2262 break; 2263 2264 case VOP_MKNOD_DESCOFFSET: 2265 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2266 &nd.ni_cnd, &vattr); 2267 if (error == 0) 2268 vput(nd.ni_vp); 2269 break; 2270 2271 case VOP_CREATE_DESCOFFSET: 2272 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2273 &nd.ni_cnd, &vattr); 2274 if (error == 0) 2275 vput(nd.ni_vp); 2276 break; 2277 } 2278 } else { 2279 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2280 if (nd.ni_dvp == vp) 2281 vrele(nd.ni_dvp); 2282 else 2283 vput(nd.ni_dvp); 2284 if (vp) 2285 vrele(vp); 2286 } 2287 out: 2288 pathbuf_stringcopy_put(pb, pathstring); 2289 pathbuf_destroy(pb); 2290 return (error); 2291 } 2292 2293 /* 2294 * Create a named pipe. 2295 */ 2296 /* ARGSUSED */ 2297 int 2298 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2299 { 2300 /* { 2301 syscallarg(const char *) path; 2302 syscallarg(int) mode; 2303 } */ 2304 return do_sys_mkfifoat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap, mode)); 2305 } 2306 2307 int 2308 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2309 register_t *retval) 2310 { 2311 /* { 2312 syscallarg(int) fd; 2313 syscallarg(const char *) path; 2314 syscallarg(int) mode; 2315 } */ 2316 2317 return do_sys_mkfifoat(l, SCARG(uap, fd), SCARG(uap, path), 2318 SCARG(uap, mode)); 2319 } 2320 2321 static int 2322 do_sys_mkfifoat(struct lwp *l, int fdat, const char *path, mode_t mode) 2323 { 2324 struct proc *p = l->l_proc; 2325 struct vattr vattr; 2326 int error; 2327 struct pathbuf *pb; 2328 struct nameidata nd; 2329 2330 error = pathbuf_copyin(path, &pb); 2331 if (error) { 2332 return error; 2333 } 2334 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2335 2336 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2337 pathbuf_destroy(pb); 2338 return error; 2339 } 2340 if (nd.ni_vp != NULL) { 2341 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2342 if (nd.ni_dvp == nd.ni_vp) 2343 vrele(nd.ni_dvp); 2344 else 2345 vput(nd.ni_dvp); 2346 vrele(nd.ni_vp); 2347 pathbuf_destroy(pb); 2348 return (EEXIST); 2349 } 2350 vattr_null(&vattr); 2351 vattr.va_type = VFIFO; 2352 /* We will read cwdi->cwdi_cmask unlocked. */ 2353 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2354 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2355 if (error == 0) 2356 vput(nd.ni_vp); 2357 pathbuf_destroy(pb); 2358 return (error); 2359 } 2360 2361 /* 2362 * Make a hard file link. 2363 */ 2364 /* ARGSUSED */ 2365 static int 2366 do_sys_linkat(struct lwp *l, int fdpath, const char *path, int fdlink, 2367 const char *link, int follow, register_t *retval) 2368 { 2369 struct vnode *vp; 2370 struct pathbuf *linkpb; 2371 struct nameidata nd; 2372 namei_simple_flags_t ns_flags; 2373 int error; 2374 2375 if (follow & AT_SYMLINK_FOLLOW) 2376 ns_flags = NSM_FOLLOW_TRYEMULROOT; 2377 else 2378 ns_flags = NSM_NOFOLLOW_TRYEMULROOT; 2379 2380 error = fd_nameiat_simple_user(l, fdpath, path, ns_flags, &vp); 2381 if (error != 0) 2382 return (error); 2383 error = pathbuf_copyin(link, &linkpb); 2384 if (error) { 2385 goto out1; 2386 } 2387 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2388 if ((error = fd_nameiat(l, fdlink, &nd)) != 0) 2389 goto out2; 2390 if (nd.ni_vp) { 2391 error = EEXIST; 2392 goto abortop; 2393 } 2394 /* Prevent hard links on directories. */ 2395 if (vp->v_type == VDIR) { 2396 error = EPERM; 2397 goto abortop; 2398 } 2399 /* Prevent cross-mount operation. */ 2400 if (nd.ni_dvp->v_mount != vp->v_mount) { 2401 error = EXDEV; 2402 goto abortop; 2403 } 2404 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2405 out2: 2406 pathbuf_destroy(linkpb); 2407 out1: 2408 vrele(vp); 2409 return (error); 2410 abortop: 2411 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2412 if (nd.ni_dvp == nd.ni_vp) 2413 vrele(nd.ni_dvp); 2414 else 2415 vput(nd.ni_dvp); 2416 if (nd.ni_vp != NULL) 2417 vrele(nd.ni_vp); 2418 goto out2; 2419 } 2420 2421 int 2422 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2423 { 2424 /* { 2425 syscallarg(const char *) path; 2426 syscallarg(const char *) link; 2427 } */ 2428 const char *path = SCARG(uap, path); 2429 const char *link = SCARG(uap, link); 2430 2431 return do_sys_linkat(l, AT_FDCWD, path, AT_FDCWD, link, 2432 AT_SYMLINK_FOLLOW, retval); 2433 } 2434 2435 int 2436 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2437 register_t *retval) 2438 { 2439 /* { 2440 syscallarg(int) fd1; 2441 syscallarg(const char *) name1; 2442 syscallarg(int) fd2; 2443 syscallarg(const char *) name2; 2444 syscallarg(int) flags; 2445 } */ 2446 int fd1 = SCARG(uap, fd1); 2447 const char *name1 = SCARG(uap, name1); 2448 int fd2 = SCARG(uap, fd2); 2449 const char *name2 = SCARG(uap, name2); 2450 int follow; 2451 2452 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2453 2454 return do_sys_linkat(l, fd1, name1, fd2, name2, follow, retval); 2455 } 2456 2457 2458 int 2459 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2460 { 2461 return do_sys_symlinkat(NULL, patharg, AT_FDCWD, link, seg); 2462 } 2463 2464 static int 2465 do_sys_symlinkat(struct lwp *l, const char *patharg, int fdat, 2466 const char *link, enum uio_seg seg) 2467 { 2468 struct proc *p = curproc; 2469 struct vattr vattr; 2470 char *path; 2471 int error; 2472 struct pathbuf *linkpb; 2473 struct nameidata nd; 2474 2475 KASSERT(l != NULL || fdat == AT_FDCWD); 2476 2477 path = PNBUF_GET(); 2478 if (seg == UIO_USERSPACE) { 2479 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2480 goto out1; 2481 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2482 goto out1; 2483 } else { 2484 KASSERT(strlen(patharg) < MAXPATHLEN); 2485 strcpy(path, patharg); 2486 linkpb = pathbuf_create(link); 2487 if (linkpb == NULL) { 2488 error = ENOMEM; 2489 goto out1; 2490 } 2491 } 2492 ktrkuser("symlink-target", path, strlen(path)); 2493 2494 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2495 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2496 goto out2; 2497 if (nd.ni_vp) { 2498 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2499 if (nd.ni_dvp == nd.ni_vp) 2500 vrele(nd.ni_dvp); 2501 else 2502 vput(nd.ni_dvp); 2503 vrele(nd.ni_vp); 2504 error = EEXIST; 2505 goto out2; 2506 } 2507 vattr_null(&vattr); 2508 vattr.va_type = VLNK; 2509 /* We will read cwdi->cwdi_cmask unlocked. */ 2510 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2511 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2512 if (error == 0) 2513 vput(nd.ni_vp); 2514 out2: 2515 pathbuf_destroy(linkpb); 2516 out1: 2517 PNBUF_PUT(path); 2518 return (error); 2519 } 2520 2521 /* 2522 * Make a symbolic link. 2523 */ 2524 /* ARGSUSED */ 2525 int 2526 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2527 { 2528 /* { 2529 syscallarg(const char *) path; 2530 syscallarg(const char *) link; 2531 } */ 2532 2533 return do_sys_symlinkat(l, SCARG(uap, path), AT_FDCWD, SCARG(uap, link), 2534 UIO_USERSPACE); 2535 } 2536 2537 int 2538 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2539 register_t *retval) 2540 { 2541 /* { 2542 syscallarg(const char *) path1; 2543 syscallarg(int) fd; 2544 syscallarg(const char *) path2; 2545 } */ 2546 2547 return do_sys_symlinkat(l, SCARG(uap, path1), SCARG(uap, fd), 2548 SCARG(uap, path2), UIO_USERSPACE); 2549 } 2550 2551 /* 2552 * Delete a whiteout from the filesystem. 2553 */ 2554 /* ARGSUSED */ 2555 int 2556 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2557 { 2558 /* { 2559 syscallarg(const char *) path; 2560 } */ 2561 int error; 2562 struct pathbuf *pb; 2563 struct nameidata nd; 2564 2565 error = pathbuf_copyin(SCARG(uap, path), &pb); 2566 if (error) { 2567 return error; 2568 } 2569 2570 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2571 error = namei(&nd); 2572 if (error) { 2573 pathbuf_destroy(pb); 2574 return (error); 2575 } 2576 2577 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2578 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2579 if (nd.ni_dvp == nd.ni_vp) 2580 vrele(nd.ni_dvp); 2581 else 2582 vput(nd.ni_dvp); 2583 if (nd.ni_vp) 2584 vrele(nd.ni_vp); 2585 pathbuf_destroy(pb); 2586 return (EEXIST); 2587 } 2588 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2589 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2590 vput(nd.ni_dvp); 2591 pathbuf_destroy(pb); 2592 return (error); 2593 } 2594 2595 /* 2596 * Delete a name from the filesystem. 2597 */ 2598 /* ARGSUSED */ 2599 int 2600 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2601 { 2602 /* { 2603 syscallarg(const char *) path; 2604 } */ 2605 2606 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 0, UIO_USERSPACE); 2607 } 2608 2609 int 2610 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2611 register_t *retval) 2612 { 2613 /* { 2614 syscallarg(int) fd; 2615 syscallarg(const char *) path; 2616 syscallarg(int) flag; 2617 } */ 2618 2619 return do_sys_unlinkat(l, SCARG(uap, fd), SCARG(uap, path), 2620 SCARG(uap, flag), UIO_USERSPACE); 2621 } 2622 2623 int 2624 do_sys_unlink(const char *arg, enum uio_seg seg) 2625 { 2626 return do_sys_unlinkat(NULL, AT_FDCWD, arg, 0, seg); 2627 } 2628 2629 static int 2630 do_sys_unlinkat(struct lwp *l, int fdat, const char *arg, int flags, 2631 enum uio_seg seg) 2632 { 2633 struct vnode *vp; 2634 int error; 2635 struct pathbuf *pb; 2636 struct nameidata nd; 2637 const char *pathstring; 2638 2639 KASSERT(l != NULL || fdat == AT_FDCWD); 2640 2641 error = pathbuf_maybe_copyin(arg, seg, &pb); 2642 if (error) { 2643 return error; 2644 } 2645 pathstring = pathbuf_stringcopy_get(pb); 2646 if (pathstring == NULL) { 2647 pathbuf_destroy(pb); 2648 return ENOMEM; 2649 } 2650 2651 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2652 if ((error = fd_nameiat(l, fdat, &nd)) != 0) 2653 goto out; 2654 vp = nd.ni_vp; 2655 2656 /* 2657 * The root of a mounted filesystem cannot be deleted. 2658 */ 2659 if ((vp->v_vflag & VV_ROOT) != 0) { 2660 error = EBUSY; 2661 goto abort; 2662 } 2663 2664 if ((vp->v_type == VDIR) && (vp->v_mountedhere != NULL)) { 2665 error = EBUSY; 2666 goto abort; 2667 } 2668 2669 /* 2670 * No rmdir "." please. 2671 */ 2672 if (nd.ni_dvp == vp) { 2673 error = EINVAL; 2674 goto abort; 2675 } 2676 2677 /* 2678 * AT_REMOVEDIR is required to remove a directory 2679 */ 2680 if (vp->v_type == VDIR) { 2681 if (!(flags & AT_REMOVEDIR)) { 2682 error = EPERM; 2683 goto abort; 2684 } else { 2685 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2686 goto out; 2687 } 2688 } 2689 2690 /* 2691 * Starting here we only deal with non directories. 2692 */ 2693 if (flags & AT_REMOVEDIR) { 2694 error = ENOTDIR; 2695 goto abort; 2696 } 2697 2698 2699 #if NVERIEXEC > 0 2700 /* Handle remove requests for veriexec entries. */ 2701 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2702 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2703 if (nd.ni_dvp == vp) 2704 vrele(nd.ni_dvp); 2705 else 2706 vput(nd.ni_dvp); 2707 vput(vp); 2708 goto out; 2709 } 2710 #endif /* NVERIEXEC > 0 */ 2711 2712 #ifdef FILEASSOC 2713 (void)fileassoc_file_delete(vp); 2714 #endif /* FILEASSOC */ 2715 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2716 goto out; 2717 2718 abort: 2719 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2720 if (nd.ni_dvp == vp) 2721 vrele(nd.ni_dvp); 2722 else 2723 vput(nd.ni_dvp); 2724 vput(vp); 2725 2726 out: 2727 pathbuf_stringcopy_put(pb, pathstring); 2728 pathbuf_destroy(pb); 2729 return (error); 2730 } 2731 2732 /* 2733 * Reposition read/write file offset. 2734 */ 2735 int 2736 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2737 { 2738 /* { 2739 syscallarg(int) fd; 2740 syscallarg(int) pad; 2741 syscallarg(off_t) offset; 2742 syscallarg(int) whence; 2743 } */ 2744 kauth_cred_t cred = l->l_cred; 2745 file_t *fp; 2746 struct vnode *vp; 2747 struct vattr vattr; 2748 off_t newoff; 2749 int error, fd; 2750 2751 fd = SCARG(uap, fd); 2752 2753 if ((fp = fd_getfile(fd)) == NULL) 2754 return (EBADF); 2755 2756 vp = fp->f_data; 2757 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2758 error = ESPIPE; 2759 goto out; 2760 } 2761 2762 switch (SCARG(uap, whence)) { 2763 case SEEK_CUR: 2764 newoff = fp->f_offset + SCARG(uap, offset); 2765 break; 2766 case SEEK_END: 2767 vn_lock(vp, LK_SHARED | LK_RETRY); 2768 error = VOP_GETATTR(vp, &vattr, cred); 2769 VOP_UNLOCK(vp); 2770 if (error) { 2771 goto out; 2772 } 2773 newoff = SCARG(uap, offset) + vattr.va_size; 2774 break; 2775 case SEEK_SET: 2776 newoff = SCARG(uap, offset); 2777 break; 2778 default: 2779 error = EINVAL; 2780 goto out; 2781 } 2782 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2783 *(off_t *)retval = fp->f_offset = newoff; 2784 } 2785 out: 2786 fd_putfile(fd); 2787 return (error); 2788 } 2789 2790 /* 2791 * Positional read system call. 2792 */ 2793 int 2794 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2795 { 2796 /* { 2797 syscallarg(int) fd; 2798 syscallarg(void *) buf; 2799 syscallarg(size_t) nbyte; 2800 syscallarg(off_t) offset; 2801 } */ 2802 file_t *fp; 2803 struct vnode *vp; 2804 off_t offset; 2805 int error, fd = SCARG(uap, fd); 2806 2807 if ((fp = fd_getfile(fd)) == NULL) 2808 return (EBADF); 2809 2810 if ((fp->f_flag & FREAD) == 0) { 2811 fd_putfile(fd); 2812 return (EBADF); 2813 } 2814 2815 vp = fp->f_data; 2816 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2817 error = ESPIPE; 2818 goto out; 2819 } 2820 2821 offset = SCARG(uap, offset); 2822 2823 /* 2824 * XXX This works because no file systems actually 2825 * XXX take any action on the seek operation. 2826 */ 2827 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2828 goto out; 2829 2830 /* dofileread() will unuse the descriptor for us */ 2831 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2832 &offset, 0, retval)); 2833 2834 out: 2835 fd_putfile(fd); 2836 return (error); 2837 } 2838 2839 /* 2840 * Positional scatter read system call. 2841 */ 2842 int 2843 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2844 { 2845 /* { 2846 syscallarg(int) fd; 2847 syscallarg(const struct iovec *) iovp; 2848 syscallarg(int) iovcnt; 2849 syscallarg(off_t) offset; 2850 } */ 2851 off_t offset = SCARG(uap, offset); 2852 2853 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2854 SCARG(uap, iovcnt), &offset, 0, retval); 2855 } 2856 2857 /* 2858 * Positional write system call. 2859 */ 2860 int 2861 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2862 { 2863 /* { 2864 syscallarg(int) fd; 2865 syscallarg(const void *) buf; 2866 syscallarg(size_t) nbyte; 2867 syscallarg(off_t) offset; 2868 } */ 2869 file_t *fp; 2870 struct vnode *vp; 2871 off_t offset; 2872 int error, fd = SCARG(uap, fd); 2873 2874 if ((fp = fd_getfile(fd)) == NULL) 2875 return (EBADF); 2876 2877 if ((fp->f_flag & FWRITE) == 0) { 2878 fd_putfile(fd); 2879 return (EBADF); 2880 } 2881 2882 vp = fp->f_data; 2883 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2884 error = ESPIPE; 2885 goto out; 2886 } 2887 2888 offset = SCARG(uap, offset); 2889 2890 /* 2891 * XXX This works because no file systems actually 2892 * XXX take any action on the seek operation. 2893 */ 2894 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2895 goto out; 2896 2897 /* dofilewrite() will unuse the descriptor for us */ 2898 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2899 &offset, 0, retval)); 2900 2901 out: 2902 fd_putfile(fd); 2903 return (error); 2904 } 2905 2906 /* 2907 * Positional gather write system call. 2908 */ 2909 int 2910 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2911 { 2912 /* { 2913 syscallarg(int) fd; 2914 syscallarg(const struct iovec *) iovp; 2915 syscallarg(int) iovcnt; 2916 syscallarg(off_t) offset; 2917 } */ 2918 off_t offset = SCARG(uap, offset); 2919 2920 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2921 SCARG(uap, iovcnt), &offset, 0, retval); 2922 } 2923 2924 /* 2925 * Check access permissions. 2926 */ 2927 int 2928 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2929 { 2930 /* { 2931 syscallarg(const char *) path; 2932 syscallarg(int) flags; 2933 } */ 2934 2935 return do_sys_accessat(l, AT_FDCWD, SCARG(uap, path), 2936 SCARG(uap, flags), 0); 2937 } 2938 2939 static int 2940 do_sys_accessat(struct lwp *l, int fdat, const char *path, 2941 int mode, int flags) 2942 { 2943 kauth_cred_t cred; 2944 struct vnode *vp; 2945 int error, nd_flag, vmode; 2946 struct pathbuf *pb; 2947 struct nameidata nd; 2948 2949 CTASSERT(F_OK == 0); 2950 if ((mode & ~(R_OK | W_OK | X_OK)) != 0) { 2951 /* nonsense mode */ 2952 return EINVAL; 2953 } 2954 2955 nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; 2956 if (flags & AT_SYMLINK_NOFOLLOW) 2957 nd_flag &= ~FOLLOW; 2958 2959 error = pathbuf_copyin(path, &pb); 2960 if (error) 2961 return error; 2962 2963 NDINIT(&nd, LOOKUP, nd_flag, pb); 2964 2965 /* Override default credentials */ 2966 cred = kauth_cred_dup(l->l_cred); 2967 if (!(flags & AT_EACCESS)) { 2968 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2969 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2970 } 2971 nd.ni_cnd.cn_cred = cred; 2972 2973 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 2974 pathbuf_destroy(pb); 2975 goto out; 2976 } 2977 vp = nd.ni_vp; 2978 pathbuf_destroy(pb); 2979 2980 /* Flags == 0 means only check for existence. */ 2981 if (mode) { 2982 vmode = 0; 2983 if (mode & R_OK) 2984 vmode |= VREAD; 2985 if (mode & W_OK) 2986 vmode |= VWRITE; 2987 if (mode & X_OK) 2988 vmode |= VEXEC; 2989 2990 error = VOP_ACCESS(vp, vmode, cred); 2991 if (!error && (vmode & VWRITE)) 2992 error = vn_writechk(vp); 2993 } 2994 vput(vp); 2995 out: 2996 kauth_cred_free(cred); 2997 return (error); 2998 } 2999 3000 int 3001 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 3002 register_t *retval) 3003 { 3004 /* { 3005 syscallarg(int) fd; 3006 syscallarg(const char *) path; 3007 syscallarg(int) amode; 3008 syscallarg(int) flag; 3009 } */ 3010 3011 return do_sys_accessat(l, SCARG(uap, fd), SCARG(uap, path), 3012 SCARG(uap, amode), SCARG(uap, flag)); 3013 } 3014 3015 /* 3016 * Common code for all sys_stat functions, including compat versions. 3017 */ 3018 int 3019 do_sys_stat(const char *userpath, unsigned int nd_flag, 3020 struct stat *sb) 3021 { 3022 return do_sys_statat(NULL, AT_FDCWD, userpath, nd_flag, sb); 3023 } 3024 3025 static int 3026 do_sys_statat(struct lwp *l, int fdat, const char *userpath, 3027 unsigned int nd_flag, struct stat *sb) 3028 { 3029 int error; 3030 struct pathbuf *pb; 3031 struct nameidata nd; 3032 3033 KASSERT(l != NULL || fdat == AT_FDCWD); 3034 3035 error = pathbuf_copyin(userpath, &pb); 3036 if (error) { 3037 return error; 3038 } 3039 3040 NDINIT(&nd, LOOKUP, nd_flag | LOCKLEAF | TRYEMULROOT, pb); 3041 3042 error = fd_nameiat(l, fdat, &nd); 3043 if (error != 0) { 3044 pathbuf_destroy(pb); 3045 return error; 3046 } 3047 error = vn_stat(nd.ni_vp, sb); 3048 vput(nd.ni_vp); 3049 pathbuf_destroy(pb); 3050 return error; 3051 } 3052 3053 /* 3054 * Get file status; this version follows links. 3055 */ 3056 /* ARGSUSED */ 3057 int 3058 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 3059 { 3060 /* { 3061 syscallarg(const char *) path; 3062 syscallarg(struct stat *) ub; 3063 } */ 3064 struct stat sb; 3065 int error; 3066 3067 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), FOLLOW, &sb); 3068 if (error) 3069 return error; 3070 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3071 } 3072 3073 /* 3074 * Get file status; this version does not follow links. 3075 */ 3076 /* ARGSUSED */ 3077 int 3078 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 3079 { 3080 /* { 3081 syscallarg(const char *) path; 3082 syscallarg(struct stat *) ub; 3083 } */ 3084 struct stat sb; 3085 int error; 3086 3087 error = do_sys_statat(l, AT_FDCWD, SCARG(uap, path), NOFOLLOW, &sb); 3088 if (error) 3089 return error; 3090 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 3091 } 3092 3093 int 3094 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 3095 register_t *retval) 3096 { 3097 /* { 3098 syscallarg(int) fd; 3099 syscallarg(const char *) path; 3100 syscallarg(struct stat *) buf; 3101 syscallarg(int) flag; 3102 } */ 3103 unsigned int nd_flag; 3104 struct stat sb; 3105 int error; 3106 3107 if (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) 3108 nd_flag = NOFOLLOW; 3109 else 3110 nd_flag = FOLLOW; 3111 3112 error = do_sys_statat(l, SCARG(uap, fd), SCARG(uap, path), nd_flag, 3113 &sb); 3114 if (error) 3115 return error; 3116 return copyout(&sb, SCARG(uap, buf), sizeof(sb)); 3117 } 3118 3119 /* 3120 * Get configurable pathname variables. 3121 */ 3122 /* ARGSUSED */ 3123 int 3124 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 3125 { 3126 /* { 3127 syscallarg(const char *) path; 3128 syscallarg(int) name; 3129 } */ 3130 int error; 3131 struct pathbuf *pb; 3132 struct nameidata nd; 3133 3134 error = pathbuf_copyin(SCARG(uap, path), &pb); 3135 if (error) { 3136 return error; 3137 } 3138 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3139 if ((error = namei(&nd)) != 0) { 3140 pathbuf_destroy(pb); 3141 return (error); 3142 } 3143 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 3144 vput(nd.ni_vp); 3145 pathbuf_destroy(pb); 3146 return (error); 3147 } 3148 3149 /* 3150 * Return target name of a symbolic link. 3151 */ 3152 /* ARGSUSED */ 3153 int 3154 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, 3155 register_t *retval) 3156 { 3157 /* { 3158 syscallarg(const char *) path; 3159 syscallarg(char *) buf; 3160 syscallarg(size_t) count; 3161 } */ 3162 return do_sys_readlinkat(l, AT_FDCWD, SCARG(uap, path), 3163 SCARG(uap, buf), SCARG(uap, count), retval); 3164 } 3165 3166 static int 3167 do_sys_readlinkat(struct lwp *l, int fdat, const char *path, char *buf, 3168 size_t count, register_t *retval) 3169 { 3170 struct vnode *vp; 3171 struct iovec aiov; 3172 struct uio auio; 3173 int error; 3174 struct pathbuf *pb; 3175 struct nameidata nd; 3176 3177 error = pathbuf_copyin(path, &pb); 3178 if (error) { 3179 return error; 3180 } 3181 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 3182 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 3183 pathbuf_destroy(pb); 3184 return error; 3185 } 3186 vp = nd.ni_vp; 3187 pathbuf_destroy(pb); 3188 if (vp->v_type != VLNK) 3189 error = EINVAL; 3190 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 3191 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 3192 aiov.iov_base = buf; 3193 aiov.iov_len = count; 3194 auio.uio_iov = &aiov; 3195 auio.uio_iovcnt = 1; 3196 auio.uio_offset = 0; 3197 auio.uio_rw = UIO_READ; 3198 KASSERT(l == curlwp); 3199 auio.uio_vmspace = l->l_proc->p_vmspace; 3200 auio.uio_resid = count; 3201 error = VOP_READLINK(vp, &auio, l->l_cred); 3202 } 3203 vput(vp); 3204 *retval = count - auio.uio_resid; 3205 return (error); 3206 } 3207 3208 int 3209 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 3210 register_t *retval) 3211 { 3212 /* { 3213 syscallarg(int) fd; 3214 syscallarg(const char *) path; 3215 syscallarg(char *) buf; 3216 syscallarg(size_t) bufsize; 3217 } */ 3218 3219 return do_sys_readlinkat(l, SCARG(uap, fd), SCARG(uap, path), 3220 SCARG(uap, buf), SCARG(uap, bufsize), retval); 3221 } 3222 3223 /* 3224 * Change flags of a file given a path name. 3225 */ 3226 /* ARGSUSED */ 3227 int 3228 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 3229 { 3230 /* { 3231 syscallarg(const char *) path; 3232 syscallarg(u_long) flags; 3233 } */ 3234 struct vnode *vp; 3235 int error; 3236 3237 error = namei_simple_user(SCARG(uap, path), 3238 NSM_FOLLOW_TRYEMULROOT, &vp); 3239 if (error != 0) 3240 return (error); 3241 error = change_flags(vp, SCARG(uap, flags), l); 3242 vput(vp); 3243 return (error); 3244 } 3245 3246 /* 3247 * Change flags of a file given a file descriptor. 3248 */ 3249 /* ARGSUSED */ 3250 int 3251 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3252 { 3253 /* { 3254 syscallarg(int) fd; 3255 syscallarg(u_long) flags; 3256 } */ 3257 struct vnode *vp; 3258 file_t *fp; 3259 int error; 3260 3261 /* fd_getvnode() will use the descriptor for us */ 3262 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3263 return (error); 3264 vp = fp->f_data; 3265 error = change_flags(vp, SCARG(uap, flags), l); 3266 VOP_UNLOCK(vp); 3267 fd_putfile(SCARG(uap, fd)); 3268 return (error); 3269 } 3270 3271 /* 3272 * Change flags of a file given a path name; this version does 3273 * not follow links. 3274 */ 3275 int 3276 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3277 { 3278 /* { 3279 syscallarg(const char *) path; 3280 syscallarg(u_long) flags; 3281 } */ 3282 struct vnode *vp; 3283 int error; 3284 3285 error = namei_simple_user(SCARG(uap, path), 3286 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3287 if (error != 0) 3288 return (error); 3289 error = change_flags(vp, SCARG(uap, flags), l); 3290 vput(vp); 3291 return (error); 3292 } 3293 3294 /* 3295 * Common routine to change flags of a file. 3296 */ 3297 int 3298 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3299 { 3300 struct vattr vattr; 3301 int error; 3302 3303 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3304 3305 vattr_null(&vattr); 3306 vattr.va_flags = flags; 3307 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3308 3309 return (error); 3310 } 3311 3312 /* 3313 * Change mode of a file given path name; this version follows links. 3314 */ 3315 /* ARGSUSED */ 3316 int 3317 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3318 { 3319 /* { 3320 syscallarg(const char *) path; 3321 syscallarg(int) mode; 3322 } */ 3323 return do_sys_chmodat(l, AT_FDCWD, SCARG(uap, path), 3324 SCARG(uap, mode), 0); 3325 } 3326 3327 static int 3328 do_sys_chmodat(struct lwp *l, int fdat, const char *path, int mode, int flags) 3329 { 3330 int error; 3331 struct vnode *vp; 3332 namei_simple_flags_t ns_flag; 3333 3334 if (flags & AT_SYMLINK_NOFOLLOW) 3335 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3336 else 3337 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3338 3339 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3340 if (error != 0) 3341 return error; 3342 3343 error = change_mode(vp, mode, l); 3344 3345 vrele(vp); 3346 3347 return (error); 3348 } 3349 3350 /* 3351 * Change mode of a file given a file descriptor. 3352 */ 3353 /* ARGSUSED */ 3354 int 3355 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3356 { 3357 /* { 3358 syscallarg(int) fd; 3359 syscallarg(int) mode; 3360 } */ 3361 file_t *fp; 3362 int error; 3363 3364 /* fd_getvnode() will use the descriptor for us */ 3365 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3366 return (error); 3367 error = change_mode(fp->f_data, SCARG(uap, mode), l); 3368 fd_putfile(SCARG(uap, fd)); 3369 return (error); 3370 } 3371 3372 int 3373 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3374 register_t *retval) 3375 { 3376 /* { 3377 syscallarg(int) fd; 3378 syscallarg(const char *) path; 3379 syscallarg(int) mode; 3380 syscallarg(int) flag; 3381 } */ 3382 3383 return do_sys_chmodat(l, SCARG(uap, fd), SCARG(uap, path), 3384 SCARG(uap, mode), SCARG(uap, flag)); 3385 } 3386 3387 /* 3388 * Change mode of a file given path name; this version does not follow links. 3389 */ 3390 /* ARGSUSED */ 3391 int 3392 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3393 { 3394 /* { 3395 syscallarg(const char *) path; 3396 syscallarg(int) mode; 3397 } */ 3398 int error; 3399 struct vnode *vp; 3400 3401 error = namei_simple_user(SCARG(uap, path), 3402 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3403 if (error != 0) 3404 return (error); 3405 3406 error = change_mode(vp, SCARG(uap, mode), l); 3407 3408 vrele(vp); 3409 return (error); 3410 } 3411 3412 /* 3413 * Common routine to set mode given a vnode. 3414 */ 3415 static int 3416 change_mode(struct vnode *vp, int mode, struct lwp *l) 3417 { 3418 struct vattr vattr; 3419 int error; 3420 3421 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3422 vattr_null(&vattr); 3423 vattr.va_mode = mode & ALLPERMS; 3424 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3425 VOP_UNLOCK(vp); 3426 return (error); 3427 } 3428 3429 /* 3430 * Set ownership given a path name; this version follows links. 3431 */ 3432 /* ARGSUSED */ 3433 int 3434 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3435 { 3436 /* { 3437 syscallarg(const char *) path; 3438 syscallarg(uid_t) uid; 3439 syscallarg(gid_t) gid; 3440 } */ 3441 return do_sys_chownat(l, AT_FDCWD, SCARG(uap, path), SCARG(uap,uid), 3442 SCARG(uap, gid), 0); 3443 } 3444 3445 static int 3446 do_sys_chownat(struct lwp *l, int fdat, const char *path, uid_t uid, 3447 gid_t gid, int flags) 3448 { 3449 int error; 3450 struct vnode *vp; 3451 namei_simple_flags_t ns_flag; 3452 3453 if (flags & AT_SYMLINK_NOFOLLOW) 3454 ns_flag = NSM_NOFOLLOW_TRYEMULROOT; 3455 else 3456 ns_flag = NSM_FOLLOW_TRYEMULROOT; 3457 3458 error = fd_nameiat_simple_user(l, fdat, path, ns_flag, &vp); 3459 if (error != 0) 3460 return error; 3461 3462 error = change_owner(vp, uid, gid, l, 0); 3463 3464 vrele(vp); 3465 3466 return (error); 3467 } 3468 3469 /* 3470 * Set ownership given a path name; this version follows links. 3471 * Provides POSIX semantics. 3472 */ 3473 /* ARGSUSED */ 3474 int 3475 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3476 { 3477 /* { 3478 syscallarg(const char *) path; 3479 syscallarg(uid_t) uid; 3480 syscallarg(gid_t) gid; 3481 } */ 3482 int error; 3483 struct vnode *vp; 3484 3485 error = namei_simple_user(SCARG(uap, path), 3486 NSM_FOLLOW_TRYEMULROOT, &vp); 3487 if (error != 0) 3488 return (error); 3489 3490 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3491 3492 vrele(vp); 3493 return (error); 3494 } 3495 3496 /* 3497 * Set ownership given a file descriptor. 3498 */ 3499 /* ARGSUSED */ 3500 int 3501 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3502 { 3503 /* { 3504 syscallarg(int) fd; 3505 syscallarg(uid_t) uid; 3506 syscallarg(gid_t) gid; 3507 } */ 3508 int error; 3509 file_t *fp; 3510 3511 /* fd_getvnode() will use the descriptor for us */ 3512 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3513 return (error); 3514 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3515 l, 0); 3516 fd_putfile(SCARG(uap, fd)); 3517 return (error); 3518 } 3519 3520 int 3521 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3522 register_t *retval) 3523 { 3524 /* { 3525 syscallarg(int) fd; 3526 syscallarg(const char *) path; 3527 syscallarg(uid_t) owner; 3528 syscallarg(gid_t) group; 3529 syscallarg(int) flag; 3530 } */ 3531 3532 return do_sys_chownat(l, SCARG(uap, fd), SCARG(uap, path), 3533 SCARG(uap, owner), SCARG(uap, group), 3534 SCARG(uap, flag)); 3535 } 3536 3537 /* 3538 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3539 */ 3540 /* ARGSUSED */ 3541 int 3542 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3543 { 3544 /* { 3545 syscallarg(int) fd; 3546 syscallarg(uid_t) uid; 3547 syscallarg(gid_t) gid; 3548 } */ 3549 int error; 3550 file_t *fp; 3551 3552 /* fd_getvnode() will use the descriptor for us */ 3553 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3554 return (error); 3555 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3556 l, 1); 3557 fd_putfile(SCARG(uap, fd)); 3558 return (error); 3559 } 3560 3561 /* 3562 * Set ownership given a path name; this version does not follow links. 3563 */ 3564 /* ARGSUSED */ 3565 int 3566 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3567 { 3568 /* { 3569 syscallarg(const char *) path; 3570 syscallarg(uid_t) uid; 3571 syscallarg(gid_t) gid; 3572 } */ 3573 int error; 3574 struct vnode *vp; 3575 3576 error = namei_simple_user(SCARG(uap, path), 3577 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3578 if (error != 0) 3579 return (error); 3580 3581 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3582 3583 vrele(vp); 3584 return (error); 3585 } 3586 3587 /* 3588 * Set ownership given a path name; this version does not follow links. 3589 * Provides POSIX/XPG semantics. 3590 */ 3591 /* ARGSUSED */ 3592 int 3593 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3594 { 3595 /* { 3596 syscallarg(const char *) path; 3597 syscallarg(uid_t) uid; 3598 syscallarg(gid_t) gid; 3599 } */ 3600 int error; 3601 struct vnode *vp; 3602 3603 error = namei_simple_user(SCARG(uap, path), 3604 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3605 if (error != 0) 3606 return (error); 3607 3608 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3609 3610 vrele(vp); 3611 return (error); 3612 } 3613 3614 /* 3615 * Common routine to set ownership given a vnode. 3616 */ 3617 static int 3618 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3619 int posix_semantics) 3620 { 3621 struct vattr vattr; 3622 mode_t newmode; 3623 int error; 3624 3625 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3626 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3627 goto out; 3628 3629 #define CHANGED(x) ((int)(x) != -1) 3630 newmode = vattr.va_mode; 3631 if (posix_semantics) { 3632 /* 3633 * POSIX/XPG semantics: if the caller is not the super-user, 3634 * clear set-user-id and set-group-id bits. Both POSIX and 3635 * the XPG consider the behaviour for calls by the super-user 3636 * implementation-defined; we leave the set-user-id and set- 3637 * group-id settings intact in that case. 3638 */ 3639 if (vattr.va_mode & S_ISUID) { 3640 if (kauth_authorize_vnode(l->l_cred, 3641 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) 3642 newmode &= ~S_ISUID; 3643 } 3644 if (vattr.va_mode & S_ISGID) { 3645 if (kauth_authorize_vnode(l->l_cred, 3646 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) 3647 newmode &= ~S_ISGID; 3648 } 3649 } else { 3650 /* 3651 * NetBSD semantics: when changing owner and/or group, 3652 * clear the respective bit(s). 3653 */ 3654 if (CHANGED(uid)) 3655 newmode &= ~S_ISUID; 3656 if (CHANGED(gid)) 3657 newmode &= ~S_ISGID; 3658 } 3659 /* Update va_mode iff altered. */ 3660 if (vattr.va_mode == newmode) 3661 newmode = VNOVAL; 3662 3663 vattr_null(&vattr); 3664 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3665 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3666 vattr.va_mode = newmode; 3667 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3668 #undef CHANGED 3669 3670 out: 3671 VOP_UNLOCK(vp); 3672 return (error); 3673 } 3674 3675 /* 3676 * Set the access and modification times given a path name; this 3677 * version follows links. 3678 */ 3679 /* ARGSUSED */ 3680 int 3681 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3682 register_t *retval) 3683 { 3684 /* { 3685 syscallarg(const char *) path; 3686 syscallarg(const struct timeval *) tptr; 3687 } */ 3688 3689 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3690 SCARG(uap, tptr), UIO_USERSPACE); 3691 } 3692 3693 /* 3694 * Set the access and modification times given a file descriptor. 3695 */ 3696 /* ARGSUSED */ 3697 int 3698 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3699 register_t *retval) 3700 { 3701 /* { 3702 syscallarg(int) fd; 3703 syscallarg(const struct timeval *) tptr; 3704 } */ 3705 int error; 3706 file_t *fp; 3707 3708 /* fd_getvnode() will use the descriptor for us */ 3709 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3710 return (error); 3711 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3712 UIO_USERSPACE); 3713 fd_putfile(SCARG(uap, fd)); 3714 return (error); 3715 } 3716 3717 int 3718 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3719 register_t *retval) 3720 { 3721 /* { 3722 syscallarg(int) fd; 3723 syscallarg(const struct timespec *) tptr; 3724 } */ 3725 int error; 3726 file_t *fp; 3727 3728 /* fd_getvnode() will use the descriptor for us */ 3729 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3730 return (error); 3731 error = do_sys_utimensat(l, AT_FDCWD, fp->f_data, NULL, 0, 3732 SCARG(uap, tptr), UIO_USERSPACE); 3733 fd_putfile(SCARG(uap, fd)); 3734 return (error); 3735 } 3736 3737 /* 3738 * Set the access and modification times given a path name; this 3739 * version does not follow links. 3740 */ 3741 int 3742 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3743 register_t *retval) 3744 { 3745 /* { 3746 syscallarg(const char *) path; 3747 syscallarg(const struct timeval *) tptr; 3748 } */ 3749 3750 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3751 SCARG(uap, tptr), UIO_USERSPACE); 3752 } 3753 3754 int 3755 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3756 register_t *retval) 3757 { 3758 /* { 3759 syscallarg(int) fd; 3760 syscallarg(const char *) path; 3761 syscallarg(const struct timespec *) tptr; 3762 syscallarg(int) flag; 3763 } */ 3764 int follow; 3765 const struct timespec *tptr; 3766 int error; 3767 3768 tptr = SCARG(uap, tptr); 3769 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3770 3771 error = do_sys_utimensat(l, SCARG(uap, fd), NULL, 3772 SCARG(uap, path), follow, tptr, UIO_USERSPACE); 3773 3774 return error; 3775 } 3776 3777 /* 3778 * Common routine to set access and modification times given a vnode. 3779 */ 3780 int 3781 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3782 const struct timespec *tptr, enum uio_seg seg) 3783 { 3784 return do_sys_utimensat(l, AT_FDCWD, vp, path, flag, tptr, seg); 3785 } 3786 3787 static int 3788 do_sys_utimensat(struct lwp *l, int fdat, struct vnode *vp, 3789 const char *path, int flag, const struct timespec *tptr, enum uio_seg seg) 3790 { 3791 struct vattr vattr; 3792 int error, dorele = 0; 3793 namei_simple_flags_t sflags; 3794 bool vanull, setbirthtime; 3795 struct timespec ts[2]; 3796 3797 KASSERT(l != NULL || fdat == AT_FDCWD); 3798 3799 /* 3800 * I have checked all callers and they pass either FOLLOW, 3801 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3802 * is 0. More to the point, they don't pass anything else. 3803 * Let's keep it that way at least until the namei interfaces 3804 * are fully sanitized. 3805 */ 3806 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3807 sflags = (flag == FOLLOW) ? 3808 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3809 3810 if (tptr == NULL) { 3811 vanull = true; 3812 nanotime(&ts[0]); 3813 ts[1] = ts[0]; 3814 } else { 3815 vanull = false; 3816 if (seg != UIO_SYSSPACE) { 3817 error = copyin(tptr, ts, sizeof (ts)); 3818 if (error != 0) 3819 return error; 3820 } else { 3821 ts[0] = tptr[0]; 3822 ts[1] = tptr[1]; 3823 } 3824 } 3825 3826 if (ts[0].tv_nsec == UTIME_NOW) { 3827 nanotime(&ts[0]); 3828 if (ts[1].tv_nsec == UTIME_NOW) { 3829 vanull = true; 3830 ts[1] = ts[0]; 3831 } 3832 } else if (ts[1].tv_nsec == UTIME_NOW) 3833 nanotime(&ts[1]); 3834 3835 if (vp == NULL) { 3836 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3837 error = fd_nameiat_simple_user(l, fdat, path, sflags, &vp); 3838 if (error != 0) 3839 return error; 3840 dorele = 1; 3841 } 3842 3843 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3844 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3845 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3846 vattr_null(&vattr); 3847 3848 if (ts[0].tv_nsec != UTIME_OMIT) 3849 vattr.va_atime = ts[0]; 3850 3851 if (ts[1].tv_nsec != UTIME_OMIT) { 3852 vattr.va_mtime = ts[1]; 3853 if (setbirthtime) 3854 vattr.va_birthtime = ts[1]; 3855 } 3856 3857 if (vanull) 3858 vattr.va_vaflags |= VA_UTIMES_NULL; 3859 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3860 VOP_UNLOCK(vp); 3861 3862 if (dorele != 0) 3863 vrele(vp); 3864 3865 return error; 3866 } 3867 3868 int 3869 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3870 const struct timeval *tptr, enum uio_seg seg) 3871 { 3872 struct timespec ts[2]; 3873 struct timespec *tsptr = NULL; 3874 int error; 3875 3876 if (tptr != NULL) { 3877 struct timeval tv[2]; 3878 3879 if (seg != UIO_SYSSPACE) { 3880 error = copyin(tptr, tv, sizeof (tv)); 3881 if (error != 0) 3882 return error; 3883 tptr = tv; 3884 } 3885 3886 if ((tv[0].tv_usec == UTIME_NOW) || 3887 (tv[0].tv_usec == UTIME_OMIT)) 3888 ts[0].tv_nsec = tv[0].tv_usec; 3889 else 3890 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3891 3892 if ((tv[1].tv_usec == UTIME_NOW) || 3893 (tv[1].tv_usec == UTIME_OMIT)) 3894 ts[1].tv_nsec = tv[1].tv_usec; 3895 else 3896 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3897 3898 tsptr = &ts[0]; 3899 } 3900 3901 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3902 } 3903 3904 /* 3905 * Truncate a file given its path name. 3906 */ 3907 /* ARGSUSED */ 3908 int 3909 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3910 { 3911 /* { 3912 syscallarg(const char *) path; 3913 syscallarg(int) pad; 3914 syscallarg(off_t) length; 3915 } */ 3916 struct vnode *vp; 3917 struct vattr vattr; 3918 int error; 3919 3920 error = namei_simple_user(SCARG(uap, path), 3921 NSM_FOLLOW_TRYEMULROOT, &vp); 3922 if (error != 0) 3923 return (error); 3924 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3925 if (vp->v_type == VDIR) 3926 error = EISDIR; 3927 else if ((error = vn_writechk(vp)) == 0 && 3928 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3929 vattr_null(&vattr); 3930 vattr.va_size = SCARG(uap, length); 3931 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3932 } 3933 vput(vp); 3934 return (error); 3935 } 3936 3937 /* 3938 * Truncate a file given a file descriptor. 3939 */ 3940 /* ARGSUSED */ 3941 int 3942 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3943 { 3944 /* { 3945 syscallarg(int) fd; 3946 syscallarg(int) pad; 3947 syscallarg(off_t) length; 3948 } */ 3949 struct vattr vattr; 3950 struct vnode *vp; 3951 file_t *fp; 3952 int error; 3953 3954 /* fd_getvnode() will use the descriptor for us */ 3955 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3956 return (error); 3957 if ((fp->f_flag & FWRITE) == 0) { 3958 error = EINVAL; 3959 goto out; 3960 } 3961 vp = fp->f_data; 3962 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3963 if (vp->v_type == VDIR) 3964 error = EISDIR; 3965 else if ((error = vn_writechk(vp)) == 0) { 3966 vattr_null(&vattr); 3967 vattr.va_size = SCARG(uap, length); 3968 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3969 } 3970 VOP_UNLOCK(vp); 3971 out: 3972 fd_putfile(SCARG(uap, fd)); 3973 return (error); 3974 } 3975 3976 /* 3977 * Sync an open file. 3978 */ 3979 /* ARGSUSED */ 3980 int 3981 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3982 { 3983 /* { 3984 syscallarg(int) fd; 3985 } */ 3986 struct vnode *vp; 3987 file_t *fp; 3988 int error; 3989 3990 /* fd_getvnode() will use the descriptor for us */ 3991 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3992 return (error); 3993 vp = fp->f_data; 3994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3995 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3996 VOP_UNLOCK(vp); 3997 fd_putfile(SCARG(uap, fd)); 3998 return (error); 3999 } 4000 4001 /* 4002 * Sync a range of file data. API modeled after that found in AIX. 4003 * 4004 * FDATASYNC indicates that we need only save enough metadata to be able 4005 * to re-read the written data. Note we duplicate AIX's requirement that 4006 * the file be open for writing. 4007 */ 4008 /* ARGSUSED */ 4009 int 4010 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 4011 { 4012 /* { 4013 syscallarg(int) fd; 4014 syscallarg(int) flags; 4015 syscallarg(off_t) start; 4016 syscallarg(off_t) length; 4017 } */ 4018 struct vnode *vp; 4019 file_t *fp; 4020 int flags, nflags; 4021 off_t s, e, len; 4022 int error; 4023 4024 /* fd_getvnode() will use the descriptor for us */ 4025 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4026 return (error); 4027 4028 if ((fp->f_flag & FWRITE) == 0) { 4029 error = EBADF; 4030 goto out; 4031 } 4032 4033 flags = SCARG(uap, flags); 4034 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 4035 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 4036 error = EINVAL; 4037 goto out; 4038 } 4039 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 4040 if (flags & FDATASYNC) 4041 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 4042 else 4043 nflags = FSYNC_WAIT; 4044 if (flags & FDISKSYNC) 4045 nflags |= FSYNC_CACHE; 4046 4047 len = SCARG(uap, length); 4048 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 4049 if (len) { 4050 s = SCARG(uap, start); 4051 e = s + len; 4052 if (e < s) { 4053 error = EINVAL; 4054 goto out; 4055 } 4056 } else { 4057 e = 0; 4058 s = 0; 4059 } 4060 4061 vp = fp->f_data; 4062 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4063 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 4064 VOP_UNLOCK(vp); 4065 out: 4066 fd_putfile(SCARG(uap, fd)); 4067 return (error); 4068 } 4069 4070 /* 4071 * Sync the data of an open file. 4072 */ 4073 /* ARGSUSED */ 4074 int 4075 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 4076 { 4077 /* { 4078 syscallarg(int) fd; 4079 } */ 4080 struct vnode *vp; 4081 file_t *fp; 4082 int error; 4083 4084 /* fd_getvnode() will use the descriptor for us */ 4085 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4086 return (error); 4087 if ((fp->f_flag & FWRITE) == 0) { 4088 fd_putfile(SCARG(uap, fd)); 4089 return (EBADF); 4090 } 4091 vp = fp->f_data; 4092 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4093 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 4094 VOP_UNLOCK(vp); 4095 fd_putfile(SCARG(uap, fd)); 4096 return (error); 4097 } 4098 4099 /* 4100 * Rename files, (standard) BSD semantics frontend. 4101 */ 4102 /* ARGSUSED */ 4103 int 4104 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 4105 { 4106 /* { 4107 syscallarg(const char *) from; 4108 syscallarg(const char *) to; 4109 } */ 4110 4111 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4112 SCARG(uap, to), UIO_USERSPACE, 0)); 4113 } 4114 4115 int 4116 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 4117 register_t *retval) 4118 { 4119 /* { 4120 syscallarg(int) fromfd; 4121 syscallarg(const char *) from; 4122 syscallarg(int) tofd; 4123 syscallarg(const char *) to; 4124 } */ 4125 4126 return (do_sys_renameat(l, SCARG(uap, fromfd), SCARG(uap, from), 4127 SCARG(uap, tofd), SCARG(uap, to), UIO_USERSPACE, 0)); 4128 } 4129 4130 /* 4131 * Rename files, POSIX semantics frontend. 4132 */ 4133 /* ARGSUSED */ 4134 int 4135 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 4136 { 4137 /* { 4138 syscallarg(const char *) from; 4139 syscallarg(const char *) to; 4140 } */ 4141 4142 return (do_sys_renameat(l, AT_FDCWD, SCARG(uap, from), AT_FDCWD, 4143 SCARG(uap, to), UIO_USERSPACE, 1)); 4144 } 4145 4146 /* 4147 * Rename files. Source and destination must either both be directories, 4148 * or both not be directories. If target is a directory, it must be empty. 4149 * If `from' and `to' refer to the same object, the value of the `retain' 4150 * argument is used to determine whether `from' will be 4151 * 4152 * (retain == 0) deleted unless `from' and `to' refer to the same 4153 * object in the file system's name space (BSD). 4154 * (retain == 1) always retained (POSIX). 4155 * 4156 * XXX Synchronize with nfsrv_rename in nfs_serv.c. 4157 */ 4158 int 4159 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 4160 { 4161 return do_sys_renameat(NULL, AT_FDCWD, from, AT_FDCWD, to, seg, retain); 4162 } 4163 4164 static int 4165 do_sys_renameat(struct lwp *l, int fromfd, const char *from, int tofd, 4166 const char *to, enum uio_seg seg, int retain) 4167 { 4168 struct pathbuf *fpb, *tpb; 4169 struct nameidata fnd, tnd; 4170 struct vnode *fdvp, *fvp; 4171 struct vnode *tdvp, *tvp; 4172 struct mount *mp, *tmp; 4173 int error; 4174 4175 KASSERT(l != NULL || (fromfd == AT_FDCWD && tofd == AT_FDCWD)); 4176 KASSERT(from != NULL); 4177 KASSERT(to != NULL); 4178 4179 error = pathbuf_maybe_copyin(from, seg, &fpb); 4180 if (error) 4181 goto out0; 4182 KASSERT(fpb != NULL); 4183 4184 error = pathbuf_maybe_copyin(to, seg, &tpb); 4185 if (error) 4186 goto out1; 4187 KASSERT(tpb != NULL); 4188 4189 /* 4190 * Lookup from. 4191 * 4192 * XXX LOCKPARENT is wrong because we don't actually want it 4193 * locked yet, but (a) namei is insane, and (b) VOP_RENAME is 4194 * insane, so for the time being we need to leave it like this. 4195 */ 4196 NDINIT(&fnd, DELETE, (LOCKPARENT | TRYEMULROOT | INRENAME), fpb); 4197 if ((error = fd_nameiat(l, fromfd, &fnd)) != 0) 4198 goto out2; 4199 4200 /* 4201 * Pull out the important results of the lookup, fdvp and fvp. 4202 * Of course, fvp is bogus because we're about to unlock fdvp. 4203 */ 4204 fdvp = fnd.ni_dvp; 4205 fvp = fnd.ni_vp; 4206 KASSERT(fdvp != NULL); 4207 KASSERT(fvp != NULL); 4208 KASSERT((fdvp == fvp) || (VOP_ISLOCKED(fdvp) == LK_EXCLUSIVE)); 4209 4210 /* 4211 * Make sure neither fdvp nor fvp is locked. 4212 */ 4213 if (fdvp != fvp) 4214 VOP_UNLOCK(fdvp); 4215 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4216 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4217 4218 /* 4219 * Reject renaming `.' and `..'. Can't do this until after 4220 * namei because we need namei's parsing to find the final 4221 * component name. (namei should just leave us with the final 4222 * component name and not look it up itself, but anyway...) 4223 * 4224 * This was here before because we used to relookup from 4225 * instead of to and relookup requires the caller to check 4226 * this, but now file systems may depend on this check, so we 4227 * must retain it until the file systems are all rototilled. 4228 */ 4229 if (((fnd.ni_cnd.cn_namelen == 1) && 4230 (fnd.ni_cnd.cn_nameptr[0] == '.')) || 4231 ((fnd.ni_cnd.cn_namelen == 2) && 4232 (fnd.ni_cnd.cn_nameptr[0] == '.') && 4233 (fnd.ni_cnd.cn_nameptr[1] == '.'))) { 4234 error = EINVAL; /* XXX EISDIR? */ 4235 goto abort0; 4236 } 4237 4238 /* 4239 * Lookup to. 4240 * 4241 * XXX LOCKPARENT is wrong, but...insanity, &c. Also, using 4242 * fvp here to decide whether to add CREATEDIR is a load of 4243 * bollocks because fvp might be the wrong node by now, since 4244 * fdvp is unlocked. 4245 * 4246 * XXX Why not pass CREATEDIR always? 4247 */ 4248 NDINIT(&tnd, RENAME, 4249 (LOCKPARENT | NOCACHE | TRYEMULROOT | INRENAME | 4250 ((fvp->v_type == VDIR)? CREATEDIR : 0)), 4251 tpb); 4252 if ((error = fd_nameiat(l, tofd, &tnd)) != 0) 4253 goto abort0; 4254 4255 /* 4256 * Pull out the important results of the lookup, tdvp and tvp. 4257 * Of course, tvp is bogus because we're about to unlock tdvp. 4258 */ 4259 tdvp = tnd.ni_dvp; 4260 tvp = tnd.ni_vp; 4261 KASSERT(tdvp != NULL); 4262 KASSERT((tdvp == tvp) || (VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE)); 4263 4264 /* 4265 * Make sure neither tdvp nor tvp is locked. 4266 */ 4267 if (tdvp != tvp) 4268 VOP_UNLOCK(tdvp); 4269 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4270 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4271 4272 /* 4273 * Reject renaming onto `.' or `..'. relookup is unhappy with 4274 * these, which is why we must do this here. Once upon a time 4275 * we relooked up from instead of to, and consequently didn't 4276 * need this check, but now that we relookup to instead of 4277 * from, we need this; and we shall need it forever forward 4278 * until the VOP_RENAME protocol changes, because file systems 4279 * will no doubt begin to depend on this check. 4280 */ 4281 if (((tnd.ni_cnd.cn_namelen == 1) && 4282 (tnd.ni_cnd.cn_nameptr[0] == '.')) || 4283 ((tnd.ni_cnd.cn_namelen == 2) && 4284 (tnd.ni_cnd.cn_nameptr[0] == '.') && 4285 (tnd.ni_cnd.cn_nameptr[1] == '.'))) { 4286 error = EINVAL; /* XXX EISDIR? */ 4287 goto abort1; 4288 } 4289 4290 /* 4291 * Get the mount point. If the file system has been unmounted, 4292 * which it may be because we're not holding any vnode locks, 4293 * then v_mount will be NULL. We're not really supposed to 4294 * read v_mount without holding the vnode lock, but since we 4295 * have fdvp referenced, if fdvp->v_mount changes then at worst 4296 * it will be set to NULL, not changed to another mount point. 4297 * And, of course, since it is up to the file system to 4298 * determine the real lock order, we can't lock both fdvp and 4299 * tdvp at the same time. 4300 */ 4301 mp = fdvp->v_mount; 4302 if (mp == NULL) { 4303 error = ENOENT; 4304 goto abort1; 4305 } 4306 4307 /* 4308 * Make sure the mount points match. Again, although we don't 4309 * hold any vnode locks, the v_mount fields may change -- but 4310 * at worst they will change to NULL, so this will never become 4311 * a cross-device rename, because we hold vnode references. 4312 * 4313 * XXX Because nothing is locked and the compiler may reorder 4314 * things here, unmounting the file system at an inopportune 4315 * moment may cause rename to fail with ENXDEV when it really 4316 * should fail with ENOENT. 4317 */ 4318 tmp = tdvp->v_mount; 4319 if (tmp == NULL) { 4320 error = ENOENT; 4321 goto abort1; 4322 } 4323 4324 if (mp != tmp) { 4325 error = EXDEV; 4326 goto abort1; 4327 } 4328 4329 /* 4330 * Take the vfs rename lock to avoid cross-directory screw cases. 4331 * Nothing is locked currently, so taking this lock is safe. 4332 */ 4333 error = VFS_RENAMELOCK_ENTER(mp); 4334 if (error) 4335 goto abort1; 4336 4337 /* 4338 * Now fdvp, fvp, tdvp, and (if nonnull) tvp are referenced, 4339 * and nothing is locked except for the vfs rename lock. 4340 * 4341 * The next step is a little rain dance to conform to the 4342 * insane lock protocol, even though it does nothing to ward 4343 * off race conditions. 4344 * 4345 * We need tdvp and tvp to be locked. However, because we have 4346 * unlocked tdvp in order to hold no locks while we take the 4347 * vfs rename lock, tvp may be wrong here, and we can't safely 4348 * lock it even if the sensible file systems will just unlock 4349 * it straight away. Consequently, we must lock tdvp and then 4350 * relookup tvp to get it locked. 4351 * 4352 * Finally, because the VOP_RENAME protocol is brain-damaged 4353 * and various file systems insanely depend on the semantics of 4354 * this brain damage, the lookup of to must be the last lookup 4355 * before VOP_RENAME. 4356 */ 4357 vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); 4358 error = relookup(tdvp, &tnd.ni_vp, &tnd.ni_cnd, 0); 4359 if (error) 4360 goto abort2; 4361 4362 /* 4363 * Drop the old tvp and pick up the new one -- which might be 4364 * the same, but that doesn't matter to us. After this, tdvp 4365 * and tvp should both be locked. 4366 */ 4367 if (tvp != NULL) 4368 vrele(tvp); 4369 tvp = tnd.ni_vp; 4370 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4371 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4372 4373 /* 4374 * The old do_sys_rename had various consistency checks here 4375 * involving fvp and tvp. fvp is bogus already here, and tvp 4376 * will become bogus soon in any sensible file system, so the 4377 * only purpose in putting these checks here is to give lip 4378 * service to these screw cases and to acknowledge that they 4379 * exist, not actually to handle them, but here you go 4380 * anyway... 4381 */ 4382 4383 /* 4384 * Acknowledge that directories and non-directories aren't 4385 * suposed to mix. 4386 */ 4387 if (tvp != NULL) { 4388 if ((fvp->v_type == VDIR) && (tvp->v_type != VDIR)) { 4389 error = ENOTDIR; 4390 goto abort3; 4391 } else if ((fvp->v_type != VDIR) && (tvp->v_type == VDIR)) { 4392 error = EISDIR; 4393 goto abort3; 4394 } 4395 } 4396 4397 /* 4398 * Acknowledge some random screw case, among the dozens that 4399 * might arise. 4400 */ 4401 if (fvp == tdvp) { 4402 error = EINVAL; 4403 goto abort3; 4404 } 4405 4406 /* 4407 * Acknowledge that POSIX has a wacky screw case. 4408 * 4409 * XXX Eventually the retain flag needs to be passed on to 4410 * VOP_RENAME. 4411 */ 4412 if (fvp == tvp) { 4413 if (retain) { 4414 error = 0; 4415 goto abort3; 4416 } else if ((fdvp == tdvp) && 4417 (fnd.ni_cnd.cn_namelen == tnd.ni_cnd.cn_namelen) && 4418 (0 == memcmp(fnd.ni_cnd.cn_nameptr, tnd.ni_cnd.cn_nameptr, 4419 fnd.ni_cnd.cn_namelen))) { 4420 error = 0; 4421 goto abort3; 4422 } 4423 } 4424 4425 /* 4426 * Make sure veriexec can screw us up. (But a race can screw 4427 * up veriexec, of course -- remember, fvp and (soon) tvp are 4428 * bogus.) 4429 */ 4430 #if NVERIEXEC > 0 4431 { 4432 char *f1, *f2; 4433 size_t f1_len; 4434 size_t f2_len; 4435 4436 f1_len = fnd.ni_cnd.cn_namelen + 1; 4437 f1 = kmem_alloc(f1_len, KM_SLEEP); 4438 strlcpy(f1, fnd.ni_cnd.cn_nameptr, f1_len); 4439 4440 f2_len = tnd.ni_cnd.cn_namelen + 1; 4441 f2 = kmem_alloc(f2_len, KM_SLEEP); 4442 strlcpy(f2, tnd.ni_cnd.cn_nameptr, f2_len); 4443 4444 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4445 4446 kmem_free(f1, f1_len); 4447 kmem_free(f2, f2_len); 4448 4449 if (error) 4450 goto abort3; 4451 } 4452 #endif /* NVERIEXEC > 0 */ 4453 4454 /* 4455 * All ready. Incant the rename vop. 4456 */ 4457 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4458 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4459 KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); 4460 KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); 4461 error = VOP_RENAME(fdvp, fvp, &fnd.ni_cnd, tdvp, tvp, &tnd.ni_cnd); 4462 4463 /* 4464 * VOP_RENAME releases fdvp, fvp, tdvp, and tvp, and unlocks 4465 * tdvp and tvp. But we can't assert any of that. 4466 */ 4467 /* XXX KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ 4468 /* XXX KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ 4469 /* XXX KASSERT(VOP_ISLOCKED(tdvp) != LK_EXCLUSIVE); */ 4470 /* XXX KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) != LK_EXCLUSIVE)); */ 4471 4472 /* 4473 * So all we have left to do is to drop the rename lock and 4474 * destroy the pathbufs. 4475 */ 4476 VFS_RENAMELOCK_EXIT(mp); 4477 goto out2; 4478 4479 abort3: if ((tvp != NULL) && (tvp != tdvp)) 4480 VOP_UNLOCK(tvp); 4481 abort2: VOP_UNLOCK(tdvp); 4482 VFS_RENAMELOCK_EXIT(mp); 4483 abort1: VOP_ABORTOP(tdvp, &tnd.ni_cnd); 4484 vrele(tdvp); 4485 if (tvp != NULL) 4486 vrele(tvp); 4487 abort0: VOP_ABORTOP(fdvp, &fnd.ni_cnd); 4488 vrele(fdvp); 4489 vrele(fvp); 4490 out2: pathbuf_destroy(tpb); 4491 out1: pathbuf_destroy(fpb); 4492 out0: return error; 4493 } 4494 4495 /* 4496 * Make a directory file. 4497 */ 4498 /* ARGSUSED */ 4499 int 4500 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4501 { 4502 /* { 4503 syscallarg(const char *) path; 4504 syscallarg(int) mode; 4505 } */ 4506 4507 return do_sys_mkdirat(l, AT_FDCWD, SCARG(uap, path), 4508 SCARG(uap, mode), UIO_USERSPACE); 4509 } 4510 4511 int 4512 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4513 register_t *retval) 4514 { 4515 /* { 4516 syscallarg(int) fd; 4517 syscallarg(const char *) path; 4518 syscallarg(int) mode; 4519 } */ 4520 4521 return do_sys_mkdirat(l, SCARG(uap, fd), SCARG(uap, path), 4522 SCARG(uap, mode), UIO_USERSPACE); 4523 } 4524 4525 4526 int 4527 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4528 { 4529 return do_sys_mkdirat(NULL, AT_FDCWD, path, mode, UIO_USERSPACE); 4530 } 4531 4532 static int 4533 do_sys_mkdirat(struct lwp *l, int fdat, const char *path, mode_t mode, 4534 enum uio_seg seg) 4535 { 4536 struct proc *p = curlwp->l_proc; 4537 struct vnode *vp; 4538 struct vattr vattr; 4539 int error; 4540 struct pathbuf *pb; 4541 struct nameidata nd; 4542 4543 KASSERT(l != NULL || fdat == AT_FDCWD); 4544 4545 /* XXX bollocks, should pass in a pathbuf */ 4546 error = pathbuf_maybe_copyin(path, seg, &pb); 4547 if (error) { 4548 return error; 4549 } 4550 4551 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4552 4553 if ((error = fd_nameiat(l, fdat, &nd)) != 0) { 4554 pathbuf_destroy(pb); 4555 return (error); 4556 } 4557 vp = nd.ni_vp; 4558 if (vp != NULL) { 4559 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4560 if (nd.ni_dvp == vp) 4561 vrele(nd.ni_dvp); 4562 else 4563 vput(nd.ni_dvp); 4564 vrele(vp); 4565 pathbuf_destroy(pb); 4566 return (EEXIST); 4567 } 4568 vattr_null(&vattr); 4569 vattr.va_type = VDIR; 4570 /* We will read cwdi->cwdi_cmask unlocked. */ 4571 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4572 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4573 if (!error) 4574 vput(nd.ni_vp); 4575 pathbuf_destroy(pb); 4576 return (error); 4577 } 4578 4579 /* 4580 * Remove a directory file. 4581 */ 4582 /* ARGSUSED */ 4583 int 4584 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4585 { 4586 return do_sys_unlinkat(l, AT_FDCWD, SCARG(uap, path), 4587 AT_REMOVEDIR, UIO_USERSPACE); 4588 } 4589 4590 /* 4591 * Read a block of directory entries in a file system independent format. 4592 */ 4593 int 4594 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4595 { 4596 /* { 4597 syscallarg(int) fd; 4598 syscallarg(char *) buf; 4599 syscallarg(size_t) count; 4600 } */ 4601 file_t *fp; 4602 int error, done; 4603 4604 /* fd_getvnode() will use the descriptor for us */ 4605 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4606 return (error); 4607 if ((fp->f_flag & FREAD) == 0) { 4608 error = EBADF; 4609 goto out; 4610 } 4611 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4612 SCARG(uap, count), &done, l, 0, 0); 4613 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4614 *retval = done; 4615 out: 4616 fd_putfile(SCARG(uap, fd)); 4617 return (error); 4618 } 4619 4620 /* 4621 * Set the mode mask for creation of filesystem nodes. 4622 */ 4623 int 4624 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4625 { 4626 /* { 4627 syscallarg(mode_t) newmask; 4628 } */ 4629 struct proc *p = l->l_proc; 4630 struct cwdinfo *cwdi; 4631 4632 /* 4633 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4634 * important is that we serialize changes to the mask. The 4635 * rw_exit() will issue a write memory barrier on our behalf, 4636 * and force the changes out to other CPUs (as it must use an 4637 * atomic operation, draining the local CPU's store buffers). 4638 */ 4639 cwdi = p->p_cwdi; 4640 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4641 *retval = cwdi->cwdi_cmask; 4642 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4643 rw_exit(&cwdi->cwdi_lock); 4644 4645 return (0); 4646 } 4647 4648 int 4649 dorevoke(struct vnode *vp, kauth_cred_t cred) 4650 { 4651 struct vattr vattr; 4652 int error, fs_decision; 4653 4654 vn_lock(vp, LK_SHARED | LK_RETRY); 4655 error = VOP_GETATTR(vp, &vattr, cred); 4656 VOP_UNLOCK(vp); 4657 if (error != 0) 4658 return error; 4659 fs_decision = (kauth_cred_geteuid(cred) == vattr.va_uid) ? 0 : EPERM; 4660 error = kauth_authorize_vnode(cred, KAUTH_VNODE_REVOKE, vp, NULL, 4661 fs_decision); 4662 if (!error) 4663 VOP_REVOKE(vp, REVOKEALL); 4664 return (error); 4665 } 4666 4667 /* 4668 * Void all references to file by ripping underlying filesystem 4669 * away from vnode. 4670 */ 4671 /* ARGSUSED */ 4672 int 4673 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4674 { 4675 /* { 4676 syscallarg(const char *) path; 4677 } */ 4678 struct vnode *vp; 4679 int error; 4680 4681 error = namei_simple_user(SCARG(uap, path), 4682 NSM_FOLLOW_TRYEMULROOT, &vp); 4683 if (error != 0) 4684 return (error); 4685 error = dorevoke(vp, l->l_cred); 4686 vrele(vp); 4687 return (error); 4688 } 4689