1 /* $NetBSD: vfs_syscalls.c,v 1.442 2011/12/02 12:30:14 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68 /* 69 * Virtual File System System Calls 70 */ 71 72 #include <sys/cdefs.h> 73 __KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.442 2011/12/02 12:30:14 yamt Exp $"); 74 75 #ifdef _KERNEL_OPT 76 #include "opt_fileassoc.h" 77 #include "veriexec.h" 78 #endif 79 80 #include <sys/param.h> 81 #include <sys/systm.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/kernel.h> 85 #include <sys/file.h> 86 #include <sys/fcntl.h> 87 #include <sys/stat.h> 88 #include <sys/vnode.h> 89 #include <sys/mount.h> 90 #include <sys/proc.h> 91 #include <sys/uio.h> 92 #include <sys/kmem.h> 93 #include <sys/dirent.h> 94 #include <sys/sysctl.h> 95 #include <sys/syscallargs.h> 96 #include <sys/vfs_syscalls.h> 97 #include <sys/ktrace.h> 98 #ifdef FILEASSOC 99 #include <sys/fileassoc.h> 100 #endif /* FILEASSOC */ 101 #include <sys/extattr.h> 102 #include <sys/verified_exec.h> 103 #include <sys/kauth.h> 104 #include <sys/atomic.h> 105 #include <sys/module.h> 106 #include <sys/buf.h> 107 108 #include <miscfs/genfs/genfs.h> 109 #include <miscfs/syncfs/syncfs.h> 110 #include <miscfs/specfs/specdev.h> 111 112 #include <nfs/rpcv2.h> 113 #include <nfs/nfsproto.h> 114 #include <nfs/nfs.h> 115 #include <nfs/nfs_var.h> 116 117 static int change_flags(struct vnode *, u_long, struct lwp *); 118 static int change_mode(struct vnode *, int, struct lwp *l); 119 static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 120 121 /* 122 * This table is used to maintain compatibility with 4.3BSD 123 * and NetBSD 0.9 mount syscalls - and possibly other systems. 124 * Note, the order is important! 125 * 126 * Do not modify this table. It should only contain filesystems 127 * supported by NetBSD 0.9 and 4.3BSD. 128 */ 129 const char * const mountcompatnames[] = { 130 NULL, /* 0 = MOUNT_NONE */ 131 MOUNT_FFS, /* 1 = MOUNT_UFS */ 132 MOUNT_NFS, /* 2 */ 133 MOUNT_MFS, /* 3 */ 134 MOUNT_MSDOS, /* 4 */ 135 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 136 MOUNT_FDESC, /* 6 */ 137 MOUNT_KERNFS, /* 7 */ 138 NULL, /* 8 = MOUNT_DEVFS */ 139 MOUNT_AFS, /* 9 */ 140 }; 141 142 const int nmountcompatnames = __arraycount(mountcompatnames); 143 144 static int 145 open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 146 { 147 int error; 148 149 fp->f_flag = flags & FMASK; 150 fp->f_type = DTYPE_VNODE; 151 fp->f_ops = &vnops; 152 fp->f_data = vp; 153 154 if (flags & (O_EXLOCK | O_SHLOCK)) { 155 struct flock lf; 156 int type; 157 158 lf.l_whence = SEEK_SET; 159 lf.l_start = 0; 160 lf.l_len = 0; 161 if (flags & O_EXLOCK) 162 lf.l_type = F_WRLCK; 163 else 164 lf.l_type = F_RDLCK; 165 type = F_FLOCK; 166 if ((flags & FNONBLOCK) == 0) 167 type |= F_WAIT; 168 VOP_UNLOCK(vp); 169 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 170 if (error) { 171 (void) vn_close(vp, fp->f_flag, fp->f_cred); 172 fd_abort(l->l_proc, fp, indx); 173 return error; 174 } 175 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 176 atomic_or_uint(&fp->f_flag, FHASLOCK); 177 } 178 if (flags & O_CLOEXEC) 179 fd_set_exclose(l, indx, true); 180 return 0; 181 } 182 183 static int 184 mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 185 void *data, size_t *data_len) 186 { 187 struct mount *mp; 188 int error = 0, saved_flags; 189 190 mp = vp->v_mount; 191 saved_flags = mp->mnt_flag; 192 193 /* We can operate only on VV_ROOT nodes. */ 194 if ((vp->v_vflag & VV_ROOT) == 0) { 195 error = EINVAL; 196 goto out; 197 } 198 199 /* 200 * We only allow the filesystem to be reloaded if it 201 * is currently mounted read-only. Additionally, we 202 * prevent read-write to read-only downgrades. 203 */ 204 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 205 (mp->mnt_flag & MNT_RDONLY) == 0 && 206 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 207 error = EOPNOTSUPP; /* Needs translation */ 208 goto out; 209 } 210 211 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 212 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 213 if (error) 214 goto out; 215 216 if (vfs_busy(mp, NULL)) { 217 error = EPERM; 218 goto out; 219 } 220 221 mutex_enter(&mp->mnt_updating); 222 223 mp->mnt_flag &= ~MNT_OP_FLAGS; 224 mp->mnt_flag |= flags & MNT_OP_FLAGS; 225 226 /* 227 * Set the mount level flags. 228 */ 229 if (flags & MNT_RDONLY) 230 mp->mnt_flag |= MNT_RDONLY; 231 else if (mp->mnt_flag & MNT_RDONLY) 232 mp->mnt_iflag |= IMNT_WANTRDWR; 233 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 234 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 235 error = VFS_MOUNT(mp, path, data, data_len); 236 237 if (error && data != NULL) { 238 int error2; 239 240 /* 241 * Update failed; let's try and see if it was an 242 * export request. For compat with 3.0 and earlier. 243 */ 244 error2 = vfs_hooks_reexport(mp, path, data); 245 246 /* 247 * Only update error code if the export request was 248 * understood but some problem occurred while 249 * processing it. 250 */ 251 if (error2 != EJUSTRETURN) 252 error = error2; 253 } 254 255 if (mp->mnt_iflag & IMNT_WANTRDWR) 256 mp->mnt_flag &= ~MNT_RDONLY; 257 if (error) 258 mp->mnt_flag = saved_flags; 259 mp->mnt_flag &= ~MNT_OP_FLAGS; 260 mp->mnt_iflag &= ~IMNT_WANTRDWR; 261 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 262 if (mp->mnt_syncer == NULL) 263 error = vfs_allocate_syncvnode(mp); 264 } else { 265 if (mp->mnt_syncer != NULL) 266 vfs_deallocate_syncvnode(mp); 267 } 268 mutex_exit(&mp->mnt_updating); 269 vfs_unbusy(mp, false, NULL); 270 271 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 272 (flags & MNT_EXTATTR)) { 273 if (VFS_EXTATTRCTL(vp->v_mount, EXTATTR_CMD_START, 274 NULL, 0, NULL) != 0) { 275 printf("%s: failed to start extattr, error = %d", 276 vp->v_mount->mnt_stat.f_mntonname, error); 277 mp->mnt_flag &= ~MNT_EXTATTR; 278 } 279 } 280 281 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 282 !(flags & MNT_EXTATTR)) { 283 if (VFS_EXTATTRCTL(vp->v_mount, EXTATTR_CMD_STOP, 284 NULL, 0, NULL) != 0) { 285 printf("%s: failed to stop extattr, error = %d", 286 vp->v_mount->mnt_stat.f_mntonname, error); 287 mp->mnt_flag |= MNT_RDONLY; 288 } 289 } 290 out: 291 return (error); 292 } 293 294 static int 295 mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 296 { 297 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 298 int error; 299 300 /* Copy file-system type from userspace. */ 301 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 302 if (error) { 303 /* 304 * Historically, filesystem types were identified by numbers. 305 * If we get an integer for the filesystem type instead of a 306 * string, we check to see if it matches one of the historic 307 * filesystem types. 308 */ 309 u_long fsindex = (u_long)fstype; 310 if (fsindex >= nmountcompatnames || 311 mountcompatnames[fsindex] == NULL) 312 return ENODEV; 313 strlcpy(fstypename, mountcompatnames[fsindex], 314 sizeof(fstypename)); 315 } 316 317 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 318 if (strcmp(fstypename, "ufs") == 0) 319 fstypename[0] = 'f'; 320 321 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 322 return 0; 323 324 /* If we can autoload a vfs module, try again */ 325 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 326 327 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 328 return 0; 329 330 return ENODEV; 331 } 332 333 static int 334 mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 335 void *data, size_t *data_len) 336 { 337 struct mount *mp; 338 int error; 339 340 /* If MNT_GETARGS is specified, it should be the only flag. */ 341 if (flags & ~MNT_GETARGS) 342 return EINVAL; 343 344 mp = vp->v_mount; 345 346 /* XXX: probably some notion of "can see" here if we want isolation. */ 347 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 348 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 349 if (error) 350 return error; 351 352 if ((vp->v_vflag & VV_ROOT) == 0) 353 return EINVAL; 354 355 if (vfs_busy(mp, NULL)) 356 return EPERM; 357 358 mutex_enter(&mp->mnt_updating); 359 mp->mnt_flag &= ~MNT_OP_FLAGS; 360 mp->mnt_flag |= MNT_GETARGS; 361 error = VFS_MOUNT(mp, path, data, data_len); 362 mp->mnt_flag &= ~MNT_OP_FLAGS; 363 mutex_exit(&mp->mnt_updating); 364 365 vfs_unbusy(mp, false, NULL); 366 return (error); 367 } 368 369 int 370 sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 371 { 372 /* { 373 syscallarg(const char *) type; 374 syscallarg(const char *) path; 375 syscallarg(int) flags; 376 syscallarg(void *) data; 377 syscallarg(size_t) data_len; 378 } */ 379 380 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 381 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 382 SCARG(uap, data_len), retval); 383 } 384 385 int 386 do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 387 const char *path, int flags, void *data, enum uio_seg data_seg, 388 size_t data_len, register_t *retval) 389 { 390 struct vnode *vp; 391 void *data_buf = data; 392 bool vfsopsrele = false; 393 int error; 394 395 /* XXX: The calling convention of this routine is totally bizarre */ 396 if (vfsops) 397 vfsopsrele = true; 398 399 /* 400 * Get vnode to be covered 401 */ 402 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 403 if (error != 0) { 404 vp = NULL; 405 goto done; 406 } 407 408 if (vfsops == NULL) { 409 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 410 vfsops = vp->v_mount->mnt_op; 411 } else { 412 /* 'type' is userspace */ 413 error = mount_get_vfsops(type, &vfsops); 414 if (error != 0) 415 goto done; 416 vfsopsrele = true; 417 } 418 } 419 420 if (data != NULL && data_seg == UIO_USERSPACE) { 421 if (data_len == 0) { 422 /* No length supplied, use default for filesystem */ 423 data_len = vfsops->vfs_min_mount_data; 424 if (data_len > VFS_MAX_MOUNT_DATA) { 425 error = EINVAL; 426 goto done; 427 } 428 /* 429 * Hopefully a longer buffer won't make copyin() fail. 430 * For compatibility with 3.0 and earlier. 431 */ 432 if (flags & MNT_UPDATE 433 && data_len < sizeof (struct mnt_export_args30)) 434 data_len = sizeof (struct mnt_export_args30); 435 } 436 data_buf = kmem_alloc(data_len, KM_SLEEP); 437 438 /* NFS needs the buffer even for mnt_getargs .... */ 439 error = copyin(data, data_buf, data_len); 440 if (error != 0) 441 goto done; 442 } 443 444 if (flags & MNT_GETARGS) { 445 if (data_len == 0) { 446 error = EINVAL; 447 goto done; 448 } 449 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 450 if (error != 0) 451 goto done; 452 if (data_seg == UIO_USERSPACE) 453 error = copyout(data_buf, data, data_len); 454 *retval = data_len; 455 } else if (flags & MNT_UPDATE) { 456 error = mount_update(l, vp, path, flags, data_buf, &data_len); 457 } else { 458 /* Locking is handled internally in mount_domount(). */ 459 KASSERT(vfsopsrele == true); 460 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 461 &data_len); 462 vfsopsrele = false; 463 464 if ((error == 0) && (flags & MNT_EXTATTR)) { 465 if (VFS_EXTATTRCTL(vp->v_mount, EXTATTR_CMD_START, 466 NULL, 0, NULL) != 0) 467 printf("%s: failed to start extattr", 468 vp->v_mount->mnt_stat.f_mntonname); 469 /* XXX remove flag */ 470 } 471 } 472 473 done: 474 if (vfsopsrele) 475 vfs_delref(vfsops); 476 if (vp != NULL) { 477 vrele(vp); 478 } 479 if (data_buf != data) 480 kmem_free(data_buf, data_len); 481 return (error); 482 } 483 484 /* 485 * Unmount a file system. 486 * 487 * Note: unmount takes a path to the vnode mounted on as argument, 488 * not special file (as before). 489 */ 490 /* ARGSUSED */ 491 int 492 sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 493 { 494 /* { 495 syscallarg(const char *) path; 496 syscallarg(int) flags; 497 } */ 498 struct vnode *vp; 499 struct mount *mp; 500 int error; 501 struct pathbuf *pb; 502 struct nameidata nd; 503 504 error = pathbuf_copyin(SCARG(uap, path), &pb); 505 if (error) { 506 return error; 507 } 508 509 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 510 if ((error = namei(&nd)) != 0) { 511 pathbuf_destroy(pb); 512 return error; 513 } 514 vp = nd.ni_vp; 515 pathbuf_destroy(pb); 516 517 mp = vp->v_mount; 518 atomic_inc_uint(&mp->mnt_refcnt); 519 VOP_UNLOCK(vp); 520 521 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 522 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 523 if (error) { 524 vrele(vp); 525 vfs_destroy(mp); 526 return (error); 527 } 528 529 /* 530 * Don't allow unmounting the root file system. 531 */ 532 if (mp->mnt_flag & MNT_ROOTFS) { 533 vrele(vp); 534 vfs_destroy(mp); 535 return (EINVAL); 536 } 537 538 /* 539 * Must be the root of the filesystem 540 */ 541 if ((vp->v_vflag & VV_ROOT) == 0) { 542 vrele(vp); 543 vfs_destroy(mp); 544 return (EINVAL); 545 } 546 547 vrele(vp); 548 error = dounmount(mp, SCARG(uap, flags), l); 549 vfs_destroy(mp); 550 return error; 551 } 552 553 /* 554 * Sync each mounted filesystem. 555 */ 556 #ifdef DEBUG 557 int syncprt = 0; 558 struct ctldebug debug0 = { "syncprt", &syncprt }; 559 #endif 560 561 void 562 do_sys_sync(struct lwp *l) 563 { 564 struct mount *mp, *nmp; 565 int asyncflag; 566 567 mutex_enter(&mountlist_lock); 568 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 569 mp = nmp) { 570 if (vfs_busy(mp, &nmp)) { 571 continue; 572 } 573 mutex_enter(&mp->mnt_updating); 574 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 575 asyncflag = mp->mnt_flag & MNT_ASYNC; 576 mp->mnt_flag &= ~MNT_ASYNC; 577 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 578 if (asyncflag) 579 mp->mnt_flag |= MNT_ASYNC; 580 } 581 mutex_exit(&mp->mnt_updating); 582 vfs_unbusy(mp, false, &nmp); 583 } 584 mutex_exit(&mountlist_lock); 585 #ifdef DEBUG 586 if (syncprt) 587 vfs_bufstats(); 588 #endif /* DEBUG */ 589 } 590 591 /* ARGSUSED */ 592 int 593 sys_sync(struct lwp *l, const void *v, register_t *retval) 594 { 595 do_sys_sync(l); 596 return (0); 597 } 598 599 600 /* 601 * Change filesystem quotas. 602 */ 603 /* ARGSUSED */ 604 int 605 sys___quotactl50(struct lwp *l, const struct sys___quotactl50_args *uap, 606 register_t *retval) 607 { 608 /* { 609 syscallarg(const char *) path; 610 syscallarg(struct plistref *) pref; 611 } */ 612 struct mount *mp; 613 int error; 614 struct vnode *vp; 615 prop_dictionary_t dict; 616 struct plistref pref; 617 618 error = namei_simple_user(SCARG(uap, path), 619 NSM_FOLLOW_TRYEMULROOT, &vp); 620 if (error != 0) 621 return (error); 622 mp = vp->v_mount; 623 error = copyin(SCARG(uap, pref), &pref, sizeof(pref)); 624 if (error) 625 return error; 626 error = prop_dictionary_copyin(&pref, &dict); 627 if (error) 628 return error; 629 error = VFS_QUOTACTL(mp, dict); 630 vrele(vp); 631 if (!error) 632 error = prop_dictionary_copyout(&pref, dict); 633 if (!error) 634 error = copyout(&pref, SCARG(uap, pref), sizeof(pref)); 635 prop_object_release(dict); 636 return (error); 637 } 638 639 int 640 dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 641 int root) 642 { 643 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 644 int error = 0; 645 646 /* 647 * If MNT_NOWAIT or MNT_LAZY is specified, do not 648 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 649 * overrides MNT_NOWAIT. 650 */ 651 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 652 (flags != MNT_WAIT && flags != 0)) { 653 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 654 goto done; 655 } 656 657 /* Get the filesystem stats now */ 658 memset(sp, 0, sizeof(*sp)); 659 if ((error = VFS_STATVFS(mp, sp)) != 0) { 660 return error; 661 } 662 663 if (cwdi->cwdi_rdir == NULL) 664 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 665 done: 666 if (cwdi->cwdi_rdir != NULL) { 667 size_t len; 668 char *bp; 669 char c; 670 char *path = PNBUF_GET(); 671 672 bp = path + MAXPATHLEN; 673 *--bp = '\0'; 674 rw_enter(&cwdi->cwdi_lock, RW_READER); 675 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 676 MAXPATHLEN / 2, 0, l); 677 rw_exit(&cwdi->cwdi_lock); 678 if (error) { 679 PNBUF_PUT(path); 680 return error; 681 } 682 len = strlen(bp); 683 if (len != 1) { 684 /* 685 * for mount points that are below our root, we can see 686 * them, so we fix up the pathname and return them. The 687 * rest we cannot see, so we don't allow viewing the 688 * data. 689 */ 690 if (strncmp(bp, sp->f_mntonname, len) == 0 && 691 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 692 (void)strlcpy(sp->f_mntonname, 693 c == '\0' ? "/" : &sp->f_mntonname[len], 694 sizeof(sp->f_mntonname)); 695 } else { 696 if (root) 697 (void)strlcpy(sp->f_mntonname, "/", 698 sizeof(sp->f_mntonname)); 699 else 700 error = EPERM; 701 } 702 } 703 PNBUF_PUT(path); 704 } 705 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 706 return error; 707 } 708 709 /* 710 * Get filesystem statistics by path. 711 */ 712 int 713 do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 714 { 715 struct mount *mp; 716 int error; 717 struct vnode *vp; 718 719 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 720 if (error != 0) 721 return error; 722 mp = vp->v_mount; 723 error = dostatvfs(mp, sb, l, flags, 1); 724 vrele(vp); 725 return error; 726 } 727 728 /* ARGSUSED */ 729 int 730 sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 731 { 732 /* { 733 syscallarg(const char *) path; 734 syscallarg(struct statvfs *) buf; 735 syscallarg(int) flags; 736 } */ 737 struct statvfs *sb; 738 int error; 739 740 sb = STATVFSBUF_GET(); 741 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 742 if (error == 0) 743 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 744 STATVFSBUF_PUT(sb); 745 return error; 746 } 747 748 /* 749 * Get filesystem statistics by fd. 750 */ 751 int 752 do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 753 { 754 file_t *fp; 755 struct mount *mp; 756 int error; 757 758 /* fd_getvnode() will use the descriptor for us */ 759 if ((error = fd_getvnode(fd, &fp)) != 0) 760 return (error); 761 mp = ((struct vnode *)fp->f_data)->v_mount; 762 error = dostatvfs(mp, sb, curlwp, flags, 1); 763 fd_putfile(fd); 764 return error; 765 } 766 767 /* ARGSUSED */ 768 int 769 sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 770 { 771 /* { 772 syscallarg(int) fd; 773 syscallarg(struct statvfs *) buf; 774 syscallarg(int) flags; 775 } */ 776 struct statvfs *sb; 777 int error; 778 779 sb = STATVFSBUF_GET(); 780 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 781 if (error == 0) 782 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 783 STATVFSBUF_PUT(sb); 784 return error; 785 } 786 787 788 /* 789 * Get statistics on all filesystems. 790 */ 791 int 792 do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 793 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 794 register_t *retval) 795 { 796 int root = 0; 797 struct proc *p = l->l_proc; 798 struct mount *mp, *nmp; 799 struct statvfs *sb; 800 size_t count, maxcount; 801 int error = 0; 802 803 sb = STATVFSBUF_GET(); 804 maxcount = bufsize / entry_sz; 805 mutex_enter(&mountlist_lock); 806 count = 0; 807 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 808 mp = nmp) { 809 if (vfs_busy(mp, &nmp)) { 810 continue; 811 } 812 if (sfsp && count < maxcount) { 813 error = dostatvfs(mp, sb, l, flags, 0); 814 if (error) { 815 vfs_unbusy(mp, false, &nmp); 816 error = 0; 817 continue; 818 } 819 error = copyfn(sb, sfsp, entry_sz); 820 if (error) { 821 vfs_unbusy(mp, false, NULL); 822 goto out; 823 } 824 sfsp = (char *)sfsp + entry_sz; 825 root |= strcmp(sb->f_mntonname, "/") == 0; 826 } 827 count++; 828 vfs_unbusy(mp, false, &nmp); 829 } 830 mutex_exit(&mountlist_lock); 831 832 if (root == 0 && p->p_cwdi->cwdi_rdir) { 833 /* 834 * fake a root entry 835 */ 836 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 837 sb, l, flags, 1); 838 if (error != 0) 839 goto out; 840 if (sfsp) { 841 error = copyfn(sb, sfsp, entry_sz); 842 if (error != 0) 843 goto out; 844 } 845 count++; 846 } 847 if (sfsp && count > maxcount) 848 *retval = maxcount; 849 else 850 *retval = count; 851 out: 852 STATVFSBUF_PUT(sb); 853 return error; 854 } 855 856 int 857 sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 858 { 859 /* { 860 syscallarg(struct statvfs *) buf; 861 syscallarg(size_t) bufsize; 862 syscallarg(int) flags; 863 } */ 864 865 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 866 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 867 } 868 869 /* 870 * Change current working directory to a given file descriptor. 871 */ 872 /* ARGSUSED */ 873 int 874 sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 875 { 876 /* { 877 syscallarg(int) fd; 878 } */ 879 struct proc *p = l->l_proc; 880 struct cwdinfo *cwdi; 881 struct vnode *vp, *tdp; 882 struct mount *mp; 883 file_t *fp; 884 int error, fd; 885 886 /* fd_getvnode() will use the descriptor for us */ 887 fd = SCARG(uap, fd); 888 if ((error = fd_getvnode(fd, &fp)) != 0) 889 return (error); 890 vp = fp->f_data; 891 892 vref(vp); 893 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 894 if (vp->v_type != VDIR) 895 error = ENOTDIR; 896 else 897 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 898 if (error) { 899 vput(vp); 900 goto out; 901 } 902 while ((mp = vp->v_mountedhere) != NULL) { 903 error = vfs_busy(mp, NULL); 904 vput(vp); 905 if (error != 0) 906 goto out; 907 error = VFS_ROOT(mp, &tdp); 908 vfs_unbusy(mp, false, NULL); 909 if (error) 910 goto out; 911 vp = tdp; 912 } 913 VOP_UNLOCK(vp); 914 915 /* 916 * Disallow changing to a directory not under the process's 917 * current root directory (if there is one). 918 */ 919 cwdi = p->p_cwdi; 920 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 921 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 922 vrele(vp); 923 error = EPERM; /* operation not permitted */ 924 } else { 925 vrele(cwdi->cwdi_cdir); 926 cwdi->cwdi_cdir = vp; 927 } 928 rw_exit(&cwdi->cwdi_lock); 929 930 out: 931 fd_putfile(fd); 932 return (error); 933 } 934 935 /* 936 * Change this process's notion of the root directory to a given file 937 * descriptor. 938 */ 939 int 940 sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 941 { 942 struct proc *p = l->l_proc; 943 struct vnode *vp; 944 file_t *fp; 945 int error, fd = SCARG(uap, fd); 946 947 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 948 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 949 return error; 950 /* fd_getvnode() will use the descriptor for us */ 951 if ((error = fd_getvnode(fd, &fp)) != 0) 952 return error; 953 vp = fp->f_data; 954 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 955 if (vp->v_type != VDIR) 956 error = ENOTDIR; 957 else 958 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 959 VOP_UNLOCK(vp); 960 if (error) 961 goto out; 962 vref(vp); 963 964 change_root(p->p_cwdi, vp, l); 965 966 out: 967 fd_putfile(fd); 968 return (error); 969 } 970 971 /* 972 * Change current working directory (``.''). 973 */ 974 /* ARGSUSED */ 975 int 976 sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 977 { 978 /* { 979 syscallarg(const char *) path; 980 } */ 981 struct proc *p = l->l_proc; 982 struct cwdinfo *cwdi; 983 int error; 984 struct vnode *vp; 985 986 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 987 &vp, l)) != 0) 988 return (error); 989 cwdi = p->p_cwdi; 990 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 991 vrele(cwdi->cwdi_cdir); 992 cwdi->cwdi_cdir = vp; 993 rw_exit(&cwdi->cwdi_lock); 994 return (0); 995 } 996 997 /* 998 * Change notion of root (``/'') directory. 999 */ 1000 /* ARGSUSED */ 1001 int 1002 sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1003 { 1004 /* { 1005 syscallarg(const char *) path; 1006 } */ 1007 struct proc *p = l->l_proc; 1008 int error; 1009 struct vnode *vp; 1010 1011 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1012 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1013 return (error); 1014 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1015 &vp, l)) != 0) 1016 return (error); 1017 1018 change_root(p->p_cwdi, vp, l); 1019 1020 return (0); 1021 } 1022 1023 /* 1024 * Common routine for chroot and fchroot. 1025 * NB: callers need to properly authorize the change root operation. 1026 */ 1027 void 1028 change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1029 { 1030 1031 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1032 if (cwdi->cwdi_rdir != NULL) 1033 vrele(cwdi->cwdi_rdir); 1034 cwdi->cwdi_rdir = vp; 1035 1036 /* 1037 * Prevent escaping from chroot by putting the root under 1038 * the working directory. Silently chdir to / if we aren't 1039 * already there. 1040 */ 1041 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1042 /* 1043 * XXX would be more failsafe to change directory to a 1044 * deadfs node here instead 1045 */ 1046 vrele(cwdi->cwdi_cdir); 1047 vref(vp); 1048 cwdi->cwdi_cdir = vp; 1049 } 1050 rw_exit(&cwdi->cwdi_lock); 1051 } 1052 1053 /* 1054 * Common routine for chroot and chdir. 1055 * XXX "where" should be enum uio_seg 1056 */ 1057 int 1058 chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1059 { 1060 struct pathbuf *pb; 1061 struct nameidata nd; 1062 int error; 1063 1064 error = pathbuf_maybe_copyin(path, where, &pb); 1065 if (error) { 1066 return error; 1067 } 1068 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1069 if ((error = namei(&nd)) != 0) { 1070 pathbuf_destroy(pb); 1071 return error; 1072 } 1073 *vpp = nd.ni_vp; 1074 pathbuf_destroy(pb); 1075 1076 if ((*vpp)->v_type != VDIR) 1077 error = ENOTDIR; 1078 else 1079 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1080 1081 if (error) 1082 vput(*vpp); 1083 else 1084 VOP_UNLOCK(*vpp); 1085 return (error); 1086 } 1087 1088 /* 1089 * Check permissions, allocate an open file structure, 1090 * and call the device open routine if any. 1091 */ 1092 int 1093 sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1094 { 1095 /* { 1096 syscallarg(const char *) path; 1097 syscallarg(int) flags; 1098 syscallarg(int) mode; 1099 } */ 1100 struct proc *p = l->l_proc; 1101 struct cwdinfo *cwdi = p->p_cwdi; 1102 file_t *fp; 1103 struct vnode *vp; 1104 int flags, cmode; 1105 int indx, error; 1106 struct pathbuf *pb; 1107 struct nameidata nd; 1108 1109 flags = FFLAGS(SCARG(uap, flags)); 1110 if ((flags & (FREAD | FWRITE)) == 0) 1111 return (EINVAL); 1112 1113 error = pathbuf_copyin(SCARG(uap, path), &pb); 1114 if (error) { 1115 return error; 1116 } 1117 1118 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1119 pathbuf_destroy(pb); 1120 return error; 1121 } 1122 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1123 cmode = ((SCARG(uap, mode) &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1124 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1125 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1126 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1127 fd_abort(p, fp, indx); 1128 if ((error == EDUPFD || error == EMOVEFD) && 1129 l->l_dupfd >= 0 && /* XXX from fdopen */ 1130 (error = 1131 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1132 *retval = indx; 1133 pathbuf_destroy(pb); 1134 return (0); 1135 } 1136 if (error == ERESTART) 1137 error = EINTR; 1138 pathbuf_destroy(pb); 1139 return (error); 1140 } 1141 1142 l->l_dupfd = 0; 1143 vp = nd.ni_vp; 1144 pathbuf_destroy(pb); 1145 1146 if ((error = open_setfp(l, fp, vp, indx, flags))) 1147 return error; 1148 1149 VOP_UNLOCK(vp); 1150 *retval = indx; 1151 fd_affix(p, fp, indx); 1152 return (0); 1153 } 1154 1155 int 1156 sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1157 { 1158 /* { 1159 syscallarg(int) fd; 1160 syscallarg(const char *) path; 1161 syscallarg(int) flags; 1162 syscallarg(int) mode; 1163 } */ 1164 1165 return ENOSYS; 1166 } 1167 1168 static void 1169 vfs__fhfree(fhandle_t *fhp) 1170 { 1171 size_t fhsize; 1172 1173 if (fhp == NULL) { 1174 return; 1175 } 1176 fhsize = FHANDLE_SIZE(fhp); 1177 kmem_free(fhp, fhsize); 1178 } 1179 1180 /* 1181 * vfs_composefh: compose a filehandle. 1182 */ 1183 1184 int 1185 vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1186 { 1187 struct mount *mp; 1188 struct fid *fidp; 1189 int error; 1190 size_t needfhsize; 1191 size_t fidsize; 1192 1193 mp = vp->v_mount; 1194 fidp = NULL; 1195 if (*fh_size < FHANDLE_SIZE_MIN) { 1196 fidsize = 0; 1197 } else { 1198 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1199 if (fhp != NULL) { 1200 memset(fhp, 0, *fh_size); 1201 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1202 fidp = &fhp->fh_fid; 1203 } 1204 } 1205 error = VFS_VPTOFH(vp, fidp, &fidsize); 1206 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1207 if (error == 0 && *fh_size < needfhsize) { 1208 error = E2BIG; 1209 } 1210 *fh_size = needfhsize; 1211 return error; 1212 } 1213 1214 int 1215 vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1216 { 1217 struct mount *mp; 1218 fhandle_t *fhp; 1219 size_t fhsize; 1220 size_t fidsize; 1221 int error; 1222 1223 *fhpp = NULL; 1224 mp = vp->v_mount; 1225 fidsize = 0; 1226 error = VFS_VPTOFH(vp, NULL, &fidsize); 1227 KASSERT(error != 0); 1228 if (error != E2BIG) { 1229 goto out; 1230 } 1231 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1232 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1233 if (fhp == NULL) { 1234 error = ENOMEM; 1235 goto out; 1236 } 1237 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1238 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1239 if (error == 0) { 1240 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1241 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1242 *fhpp = fhp; 1243 } else { 1244 kmem_free(fhp, fhsize); 1245 } 1246 out: 1247 return error; 1248 } 1249 1250 void 1251 vfs_composefh_free(fhandle_t *fhp) 1252 { 1253 1254 vfs__fhfree(fhp); 1255 } 1256 1257 /* 1258 * vfs_fhtovp: lookup a vnode by a filehandle. 1259 */ 1260 1261 int 1262 vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1263 { 1264 struct mount *mp; 1265 int error; 1266 1267 *vpp = NULL; 1268 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1269 if (mp == NULL) { 1270 error = ESTALE; 1271 goto out; 1272 } 1273 if (mp->mnt_op->vfs_fhtovp == NULL) { 1274 error = EOPNOTSUPP; 1275 goto out; 1276 } 1277 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1278 out: 1279 return error; 1280 } 1281 1282 /* 1283 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1284 * the needed size. 1285 */ 1286 1287 int 1288 vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1289 { 1290 fhandle_t *fhp; 1291 int error; 1292 1293 *fhpp = NULL; 1294 if (fhsize > FHANDLE_SIZE_MAX) { 1295 return EINVAL; 1296 } 1297 if (fhsize < FHANDLE_SIZE_MIN) { 1298 return EINVAL; 1299 } 1300 again: 1301 fhp = kmem_alloc(fhsize, KM_SLEEP); 1302 if (fhp == NULL) { 1303 return ENOMEM; 1304 } 1305 error = copyin(ufhp, fhp, fhsize); 1306 if (error == 0) { 1307 /* XXX this check shouldn't be here */ 1308 if (FHANDLE_SIZE(fhp) == fhsize) { 1309 *fhpp = fhp; 1310 return 0; 1311 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1312 /* 1313 * a kludge for nfsv2 padded handles. 1314 */ 1315 size_t sz; 1316 1317 sz = FHANDLE_SIZE(fhp); 1318 kmem_free(fhp, fhsize); 1319 fhsize = sz; 1320 goto again; 1321 } else { 1322 /* 1323 * userland told us wrong size. 1324 */ 1325 error = EINVAL; 1326 } 1327 } 1328 kmem_free(fhp, fhsize); 1329 return error; 1330 } 1331 1332 void 1333 vfs_copyinfh_free(fhandle_t *fhp) 1334 { 1335 1336 vfs__fhfree(fhp); 1337 } 1338 1339 /* 1340 * Get file handle system call 1341 */ 1342 int 1343 sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1344 { 1345 /* { 1346 syscallarg(char *) fname; 1347 syscallarg(fhandle_t *) fhp; 1348 syscallarg(size_t *) fh_size; 1349 } */ 1350 struct vnode *vp; 1351 fhandle_t *fh; 1352 int error; 1353 struct pathbuf *pb; 1354 struct nameidata nd; 1355 size_t sz; 1356 size_t usz; 1357 1358 /* 1359 * Must be super user 1360 */ 1361 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1362 0, NULL, NULL, NULL); 1363 if (error) 1364 return (error); 1365 1366 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1367 if (error) { 1368 return error; 1369 } 1370 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1371 error = namei(&nd); 1372 if (error) { 1373 pathbuf_destroy(pb); 1374 return error; 1375 } 1376 vp = nd.ni_vp; 1377 pathbuf_destroy(pb); 1378 1379 error = vfs_composefh_alloc(vp, &fh); 1380 vput(vp); 1381 if (error != 0) { 1382 goto out; 1383 } 1384 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1385 if (error != 0) { 1386 goto out; 1387 } 1388 sz = FHANDLE_SIZE(fh); 1389 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1390 if (error != 0) { 1391 goto out; 1392 } 1393 if (usz >= sz) { 1394 error = copyout(fh, SCARG(uap, fhp), sz); 1395 } else { 1396 error = E2BIG; 1397 } 1398 out: 1399 vfs_composefh_free(fh); 1400 return (error); 1401 } 1402 1403 /* 1404 * Open a file given a file handle. 1405 * 1406 * Check permissions, allocate an open file structure, 1407 * and call the device open routine if any. 1408 */ 1409 1410 int 1411 dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1412 register_t *retval) 1413 { 1414 file_t *fp; 1415 struct vnode *vp = NULL; 1416 kauth_cred_t cred = l->l_cred; 1417 file_t *nfp; 1418 int indx, error = 0; 1419 struct vattr va; 1420 fhandle_t *fh; 1421 int flags; 1422 proc_t *p; 1423 1424 p = curproc; 1425 1426 /* 1427 * Must be super user 1428 */ 1429 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1430 0, NULL, NULL, NULL))) 1431 return (error); 1432 1433 flags = FFLAGS(oflags); 1434 if ((flags & (FREAD | FWRITE)) == 0) 1435 return (EINVAL); 1436 if ((flags & O_CREAT)) 1437 return (EINVAL); 1438 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1439 return (error); 1440 fp = nfp; 1441 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1442 if (error != 0) { 1443 goto bad; 1444 } 1445 error = vfs_fhtovp(fh, &vp); 1446 if (error != 0) { 1447 goto bad; 1448 } 1449 1450 /* Now do an effective vn_open */ 1451 1452 if (vp->v_type == VSOCK) { 1453 error = EOPNOTSUPP; 1454 goto bad; 1455 } 1456 error = vn_openchk(vp, cred, flags); 1457 if (error != 0) 1458 goto bad; 1459 if (flags & O_TRUNC) { 1460 VOP_UNLOCK(vp); /* XXX */ 1461 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1462 vattr_null(&va); 1463 va.va_size = 0; 1464 error = VOP_SETATTR(vp, &va, cred); 1465 if (error) 1466 goto bad; 1467 } 1468 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1469 goto bad; 1470 if (flags & FWRITE) { 1471 mutex_enter(vp->v_interlock); 1472 vp->v_writecount++; 1473 mutex_exit(vp->v_interlock); 1474 } 1475 1476 /* done with modified vn_open, now finish what sys_open does. */ 1477 if ((error = open_setfp(l, fp, vp, indx, flags))) 1478 return error; 1479 1480 VOP_UNLOCK(vp); 1481 *retval = indx; 1482 fd_affix(p, fp, indx); 1483 vfs_copyinfh_free(fh); 1484 return (0); 1485 1486 bad: 1487 fd_abort(p, fp, indx); 1488 if (vp != NULL) 1489 vput(vp); 1490 vfs_copyinfh_free(fh); 1491 return (error); 1492 } 1493 1494 int 1495 sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1496 { 1497 /* { 1498 syscallarg(const void *) fhp; 1499 syscallarg(size_t) fh_size; 1500 syscallarg(int) flags; 1501 } */ 1502 1503 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1504 SCARG(uap, flags), retval); 1505 } 1506 1507 int 1508 do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1509 { 1510 int error; 1511 fhandle_t *fh; 1512 struct vnode *vp; 1513 1514 /* 1515 * Must be super user 1516 */ 1517 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1518 0, NULL, NULL, NULL))) 1519 return (error); 1520 1521 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1522 if (error != 0) 1523 return error; 1524 1525 error = vfs_fhtovp(fh, &vp); 1526 vfs_copyinfh_free(fh); 1527 if (error != 0) 1528 return error; 1529 1530 error = vn_stat(vp, sb); 1531 vput(vp); 1532 return error; 1533 } 1534 1535 1536 /* ARGSUSED */ 1537 int 1538 sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1539 { 1540 /* { 1541 syscallarg(const void *) fhp; 1542 syscallarg(size_t) fh_size; 1543 syscallarg(struct stat *) sb; 1544 } */ 1545 struct stat sb; 1546 int error; 1547 1548 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1549 if (error) 1550 return error; 1551 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1552 } 1553 1554 int 1555 do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1556 int flags) 1557 { 1558 fhandle_t *fh; 1559 struct mount *mp; 1560 struct vnode *vp; 1561 int error; 1562 1563 /* 1564 * Must be super user 1565 */ 1566 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1567 0, NULL, NULL, NULL))) 1568 return error; 1569 1570 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1571 if (error != 0) 1572 return error; 1573 1574 error = vfs_fhtovp(fh, &vp); 1575 vfs_copyinfh_free(fh); 1576 if (error != 0) 1577 return error; 1578 1579 mp = vp->v_mount; 1580 error = dostatvfs(mp, sb, l, flags, 1); 1581 vput(vp); 1582 return error; 1583 } 1584 1585 /* ARGSUSED */ 1586 int 1587 sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 1588 { 1589 /* { 1590 syscallarg(const void *) fhp; 1591 syscallarg(size_t) fh_size; 1592 syscallarg(struct statvfs *) buf; 1593 syscallarg(int) flags; 1594 } */ 1595 struct statvfs *sb = STATVFSBUF_GET(); 1596 int error; 1597 1598 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 1599 SCARG(uap, flags)); 1600 if (error == 0) 1601 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1602 STATVFSBUF_PUT(sb); 1603 return error; 1604 } 1605 1606 /* 1607 * Create a special file. 1608 */ 1609 /* ARGSUSED */ 1610 int 1611 sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 1612 register_t *retval) 1613 { 1614 /* { 1615 syscallarg(const char *) path; 1616 syscallarg(mode_t) mode; 1617 syscallarg(dev_t) dev; 1618 } */ 1619 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 1620 SCARG(uap, dev), retval, UIO_USERSPACE); 1621 } 1622 1623 int 1624 sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 1625 register_t *retval) 1626 { 1627 /* { 1628 syscallarg(int) fd; 1629 syscallarg(const char *) path; 1630 syscallarg(mode_t) mode; 1631 syscallarg(uint32_t) dev; 1632 } */ 1633 1634 return ENOSYS; 1635 } 1636 1637 int 1638 do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 1639 register_t *retval, enum uio_seg seg) 1640 { 1641 struct proc *p = l->l_proc; 1642 struct vnode *vp; 1643 struct vattr vattr; 1644 int error, optype; 1645 struct pathbuf *pb; 1646 struct nameidata nd; 1647 const char *pathstring; 1648 1649 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 1650 0, NULL, NULL, NULL)) != 0) 1651 return (error); 1652 1653 optype = VOP_MKNOD_DESCOFFSET; 1654 1655 error = pathbuf_maybe_copyin(pathname, seg, &pb); 1656 if (error) { 1657 return error; 1658 } 1659 pathstring = pathbuf_stringcopy_get(pb); 1660 if (pathstring == NULL) { 1661 pathbuf_destroy(pb); 1662 return ENOMEM; 1663 } 1664 1665 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 1666 if ((error = namei(&nd)) != 0) 1667 goto out; 1668 vp = nd.ni_vp; 1669 1670 if (vp != NULL) 1671 error = EEXIST; 1672 else { 1673 vattr_null(&vattr); 1674 /* We will read cwdi->cwdi_cmask unlocked. */ 1675 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1676 vattr.va_rdev = dev; 1677 1678 switch (mode & S_IFMT) { 1679 case S_IFMT: /* used by badsect to flag bad sectors */ 1680 vattr.va_type = VBAD; 1681 break; 1682 case S_IFCHR: 1683 vattr.va_type = VCHR; 1684 break; 1685 case S_IFBLK: 1686 vattr.va_type = VBLK; 1687 break; 1688 case S_IFWHT: 1689 optype = VOP_WHITEOUT_DESCOFFSET; 1690 break; 1691 case S_IFREG: 1692 #if NVERIEXEC > 0 1693 error = veriexec_openchk(l, nd.ni_vp, pathstring, 1694 O_CREAT); 1695 #endif /* NVERIEXEC > 0 */ 1696 vattr.va_type = VREG; 1697 vattr.va_rdev = VNOVAL; 1698 optype = VOP_CREATE_DESCOFFSET; 1699 break; 1700 default: 1701 error = EINVAL; 1702 break; 1703 } 1704 } 1705 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 1706 && vattr.va_rdev == VNOVAL) 1707 error = EINVAL; 1708 if (!error) { 1709 switch (optype) { 1710 case VOP_WHITEOUT_DESCOFFSET: 1711 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1712 if (error) 1713 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1714 vput(nd.ni_dvp); 1715 break; 1716 1717 case VOP_MKNOD_DESCOFFSET: 1718 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1719 &nd.ni_cnd, &vattr); 1720 if (error == 0) 1721 vput(nd.ni_vp); 1722 break; 1723 1724 case VOP_CREATE_DESCOFFSET: 1725 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 1726 &nd.ni_cnd, &vattr); 1727 if (error == 0) 1728 vput(nd.ni_vp); 1729 break; 1730 } 1731 } else { 1732 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1733 if (nd.ni_dvp == vp) 1734 vrele(nd.ni_dvp); 1735 else 1736 vput(nd.ni_dvp); 1737 if (vp) 1738 vrele(vp); 1739 } 1740 out: 1741 pathbuf_stringcopy_put(pb, pathstring); 1742 pathbuf_destroy(pb); 1743 return (error); 1744 } 1745 1746 /* 1747 * Create a named pipe. 1748 */ 1749 /* ARGSUSED */ 1750 int 1751 sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 1752 { 1753 /* { 1754 syscallarg(const char *) path; 1755 syscallarg(int) mode; 1756 } */ 1757 struct proc *p = l->l_proc; 1758 struct vattr vattr; 1759 int error; 1760 struct pathbuf *pb; 1761 struct nameidata nd; 1762 1763 error = pathbuf_copyin(SCARG(uap, path), &pb); 1764 if (error) { 1765 return error; 1766 } 1767 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 1768 if ((error = namei(&nd)) != 0) { 1769 pathbuf_destroy(pb); 1770 return error; 1771 } 1772 if (nd.ni_vp != NULL) { 1773 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1774 if (nd.ni_dvp == nd.ni_vp) 1775 vrele(nd.ni_dvp); 1776 else 1777 vput(nd.ni_dvp); 1778 vrele(nd.ni_vp); 1779 pathbuf_destroy(pb); 1780 return (EEXIST); 1781 } 1782 vattr_null(&vattr); 1783 vattr.va_type = VFIFO; 1784 /* We will read cwdi->cwdi_cmask unlocked. */ 1785 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 1786 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1787 if (error == 0) 1788 vput(nd.ni_vp); 1789 pathbuf_destroy(pb); 1790 return (error); 1791 } 1792 1793 int 1794 sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 1795 register_t *retval) 1796 { 1797 /* { 1798 syscallarg(int) fd; 1799 syscallarg(const char *) path; 1800 syscallarg(int) mode; 1801 } */ 1802 1803 return ENOSYS; 1804 } 1805 /* 1806 * Make a hard file link. 1807 */ 1808 /* ARGSUSED */ 1809 static int 1810 do_sys_link(struct lwp *l, const char *path, const char *link, 1811 int follow, register_t *retval) 1812 { 1813 struct vnode *vp; 1814 struct pathbuf *linkpb; 1815 struct nameidata nd; 1816 namei_simple_flags_t namei_simple_flags; 1817 int error; 1818 1819 if (follow) 1820 namei_simple_flags = NSM_FOLLOW_TRYEMULROOT; 1821 else 1822 namei_simple_flags = NSM_NOFOLLOW_TRYEMULROOT; 1823 1824 error = namei_simple_user(path, namei_simple_flags, &vp); 1825 if (error != 0) 1826 return (error); 1827 error = pathbuf_copyin(link, &linkpb); 1828 if (error) { 1829 goto out1; 1830 } 1831 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 1832 if ((error = namei(&nd)) != 0) 1833 goto out2; 1834 if (nd.ni_vp) { 1835 error = EEXIST; 1836 goto abortop; 1837 } 1838 /* Prevent hard links on directories. */ 1839 if (vp->v_type == VDIR) { 1840 error = EPERM; 1841 goto abortop; 1842 } 1843 /* Prevent cross-mount operation. */ 1844 if (nd.ni_dvp->v_mount != vp->v_mount) { 1845 error = EXDEV; 1846 goto abortop; 1847 } 1848 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1849 out2: 1850 pathbuf_destroy(linkpb); 1851 out1: 1852 vrele(vp); 1853 return (error); 1854 abortop: 1855 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1856 if (nd.ni_dvp == nd.ni_vp) 1857 vrele(nd.ni_dvp); 1858 else 1859 vput(nd.ni_dvp); 1860 if (nd.ni_vp != NULL) 1861 vrele(nd.ni_vp); 1862 goto out2; 1863 } 1864 1865 int 1866 sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 1867 { 1868 /* { 1869 syscallarg(const char *) path; 1870 syscallarg(const char *) link; 1871 } */ 1872 const char *path = SCARG(uap, path); 1873 const char *link = SCARG(uap, link); 1874 1875 return do_sys_link(l, path, link, 1, retval); 1876 } 1877 1878 int 1879 sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 1880 register_t *retval) 1881 { 1882 /* { 1883 syscallarg(int) fd1; 1884 syscallarg(const char *) name1; 1885 syscallarg(int) fd2; 1886 syscallarg(const char *) name2; 1887 syscallarg(int) flags; 1888 } */ 1889 const char *name1 = SCARG(uap, name1); 1890 const char *name2 = SCARG(uap, name2); 1891 int follow; 1892 1893 /* 1894 * Specified fd1 and fd2 are not yet implemented 1895 */ 1896 if ((SCARG(uap, fd1) != AT_FDCWD) || (SCARG(uap, fd2) != AT_FDCWD)) 1897 return ENOSYS; 1898 1899 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 1900 1901 return do_sys_link(l, name1, name2, follow, retval); 1902 } 1903 1904 1905 int 1906 do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 1907 { 1908 struct proc *p = curproc; 1909 struct vattr vattr; 1910 char *path; 1911 int error; 1912 struct pathbuf *linkpb; 1913 struct nameidata nd; 1914 1915 path = PNBUF_GET(); 1916 if (seg == UIO_USERSPACE) { 1917 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 1918 goto out1; 1919 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 1920 goto out1; 1921 } else { 1922 KASSERT(strlen(patharg) < MAXPATHLEN); 1923 strcpy(path, patharg); 1924 linkpb = pathbuf_create(link); 1925 if (linkpb == NULL) { 1926 error = ENOMEM; 1927 goto out1; 1928 } 1929 } 1930 ktrkuser("symlink-target", path, strlen(path)); 1931 1932 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 1933 if ((error = namei(&nd)) != 0) 1934 goto out2; 1935 if (nd.ni_vp) { 1936 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 1937 if (nd.ni_dvp == nd.ni_vp) 1938 vrele(nd.ni_dvp); 1939 else 1940 vput(nd.ni_dvp); 1941 vrele(nd.ni_vp); 1942 error = EEXIST; 1943 goto out2; 1944 } 1945 vattr_null(&vattr); 1946 vattr.va_type = VLNK; 1947 /* We will read cwdi->cwdi_cmask unlocked. */ 1948 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 1949 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 1950 if (error == 0) 1951 vput(nd.ni_vp); 1952 out2: 1953 pathbuf_destroy(linkpb); 1954 out1: 1955 PNBUF_PUT(path); 1956 return (error); 1957 } 1958 1959 /* 1960 * Make a symbolic link. 1961 */ 1962 /* ARGSUSED */ 1963 int 1964 sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 1965 { 1966 /* { 1967 syscallarg(const char *) path; 1968 syscallarg(const char *) link; 1969 } */ 1970 1971 return do_sys_symlink(SCARG(uap, path), SCARG(uap, link), 1972 UIO_USERSPACE); 1973 } 1974 1975 int 1976 sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 1977 register_t *retval) 1978 { 1979 /* { 1980 syscallarg(int) fd; 1981 syscallarg(const char *) path; 1982 syscallarg(const char *) link; 1983 } */ 1984 1985 return ENOSYS; 1986 } 1987 1988 /* 1989 * Delete a whiteout from the filesystem. 1990 */ 1991 /* ARGSUSED */ 1992 int 1993 sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 1994 { 1995 /* { 1996 syscallarg(const char *) path; 1997 } */ 1998 int error; 1999 struct pathbuf *pb; 2000 struct nameidata nd; 2001 2002 error = pathbuf_copyin(SCARG(uap, path), &pb); 2003 if (error) { 2004 return error; 2005 } 2006 2007 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2008 error = namei(&nd); 2009 if (error) { 2010 pathbuf_destroy(pb); 2011 return (error); 2012 } 2013 2014 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2015 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2016 if (nd.ni_dvp == nd.ni_vp) 2017 vrele(nd.ni_dvp); 2018 else 2019 vput(nd.ni_dvp); 2020 if (nd.ni_vp) 2021 vrele(nd.ni_vp); 2022 pathbuf_destroy(pb); 2023 return (EEXIST); 2024 } 2025 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2026 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2027 vput(nd.ni_dvp); 2028 pathbuf_destroy(pb); 2029 return (error); 2030 } 2031 2032 /* 2033 * Delete a name from the filesystem. 2034 */ 2035 /* ARGSUSED */ 2036 int 2037 sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2038 { 2039 /* { 2040 syscallarg(const char *) path; 2041 } */ 2042 2043 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2044 } 2045 2046 int 2047 sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2048 register_t *retval) 2049 { 2050 /* { 2051 syscallarg(int) fd; 2052 syscallarg(const char *) path; 2053 } */ 2054 2055 return ENOSYS; 2056 } 2057 2058 int 2059 do_sys_unlink(const char *arg, enum uio_seg seg) 2060 { 2061 struct vnode *vp; 2062 int error; 2063 struct pathbuf *pb; 2064 struct nameidata nd; 2065 const char *pathstring; 2066 2067 error = pathbuf_maybe_copyin(arg, seg, &pb); 2068 if (error) { 2069 return error; 2070 } 2071 pathstring = pathbuf_stringcopy_get(pb); 2072 if (pathstring == NULL) { 2073 pathbuf_destroy(pb); 2074 return ENOMEM; 2075 } 2076 2077 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2078 if ((error = namei(&nd)) != 0) 2079 goto out; 2080 vp = nd.ni_vp; 2081 2082 /* 2083 * The root of a mounted filesystem cannot be deleted. 2084 */ 2085 if (vp->v_vflag & VV_ROOT) { 2086 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2087 if (nd.ni_dvp == vp) 2088 vrele(nd.ni_dvp); 2089 else 2090 vput(nd.ni_dvp); 2091 vput(vp); 2092 error = EBUSY; 2093 goto out; 2094 } 2095 2096 #if NVERIEXEC > 0 2097 /* Handle remove requests for veriexec entries. */ 2098 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2099 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2100 if (nd.ni_dvp == vp) 2101 vrele(nd.ni_dvp); 2102 else 2103 vput(nd.ni_dvp); 2104 vput(vp); 2105 goto out; 2106 } 2107 #endif /* NVERIEXEC > 0 */ 2108 2109 #ifdef FILEASSOC 2110 (void)fileassoc_file_delete(vp); 2111 #endif /* FILEASSOC */ 2112 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2113 out: 2114 pathbuf_stringcopy_put(pb, pathstring); 2115 pathbuf_destroy(pb); 2116 return (error); 2117 } 2118 2119 /* 2120 * Reposition read/write file offset. 2121 */ 2122 int 2123 sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2124 { 2125 /* { 2126 syscallarg(int) fd; 2127 syscallarg(int) pad; 2128 syscallarg(off_t) offset; 2129 syscallarg(int) whence; 2130 } */ 2131 kauth_cred_t cred = l->l_cred; 2132 file_t *fp; 2133 struct vnode *vp; 2134 struct vattr vattr; 2135 off_t newoff; 2136 int error, fd; 2137 2138 fd = SCARG(uap, fd); 2139 2140 if ((fp = fd_getfile(fd)) == NULL) 2141 return (EBADF); 2142 2143 vp = fp->f_data; 2144 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2145 error = ESPIPE; 2146 goto out; 2147 } 2148 2149 switch (SCARG(uap, whence)) { 2150 case SEEK_CUR: 2151 newoff = fp->f_offset + SCARG(uap, offset); 2152 break; 2153 case SEEK_END: 2154 vn_lock(vp, LK_SHARED | LK_RETRY); 2155 error = VOP_GETATTR(vp, &vattr, cred); 2156 VOP_UNLOCK(vp); 2157 if (error) { 2158 goto out; 2159 } 2160 newoff = SCARG(uap, offset) + vattr.va_size; 2161 break; 2162 case SEEK_SET: 2163 newoff = SCARG(uap, offset); 2164 break; 2165 default: 2166 error = EINVAL; 2167 goto out; 2168 } 2169 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2170 *(off_t *)retval = fp->f_offset = newoff; 2171 } 2172 out: 2173 fd_putfile(fd); 2174 return (error); 2175 } 2176 2177 /* 2178 * Positional read system call. 2179 */ 2180 int 2181 sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2182 { 2183 /* { 2184 syscallarg(int) fd; 2185 syscallarg(void *) buf; 2186 syscallarg(size_t) nbyte; 2187 syscallarg(off_t) offset; 2188 } */ 2189 file_t *fp; 2190 struct vnode *vp; 2191 off_t offset; 2192 int error, fd = SCARG(uap, fd); 2193 2194 if ((fp = fd_getfile(fd)) == NULL) 2195 return (EBADF); 2196 2197 if ((fp->f_flag & FREAD) == 0) { 2198 fd_putfile(fd); 2199 return (EBADF); 2200 } 2201 2202 vp = fp->f_data; 2203 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2204 error = ESPIPE; 2205 goto out; 2206 } 2207 2208 offset = SCARG(uap, offset); 2209 2210 /* 2211 * XXX This works because no file systems actually 2212 * XXX take any action on the seek operation. 2213 */ 2214 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2215 goto out; 2216 2217 /* dofileread() will unuse the descriptor for us */ 2218 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2219 &offset, 0, retval)); 2220 2221 out: 2222 fd_putfile(fd); 2223 return (error); 2224 } 2225 2226 /* 2227 * Positional scatter read system call. 2228 */ 2229 int 2230 sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2231 { 2232 /* { 2233 syscallarg(int) fd; 2234 syscallarg(const struct iovec *) iovp; 2235 syscallarg(int) iovcnt; 2236 syscallarg(off_t) offset; 2237 } */ 2238 off_t offset = SCARG(uap, offset); 2239 2240 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2241 SCARG(uap, iovcnt), &offset, 0, retval); 2242 } 2243 2244 /* 2245 * Positional write system call. 2246 */ 2247 int 2248 sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2249 { 2250 /* { 2251 syscallarg(int) fd; 2252 syscallarg(const void *) buf; 2253 syscallarg(size_t) nbyte; 2254 syscallarg(off_t) offset; 2255 } */ 2256 file_t *fp; 2257 struct vnode *vp; 2258 off_t offset; 2259 int error, fd = SCARG(uap, fd); 2260 2261 if ((fp = fd_getfile(fd)) == NULL) 2262 return (EBADF); 2263 2264 if ((fp->f_flag & FWRITE) == 0) { 2265 fd_putfile(fd); 2266 return (EBADF); 2267 } 2268 2269 vp = fp->f_data; 2270 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2271 error = ESPIPE; 2272 goto out; 2273 } 2274 2275 offset = SCARG(uap, offset); 2276 2277 /* 2278 * XXX This works because no file systems actually 2279 * XXX take any action on the seek operation. 2280 */ 2281 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2282 goto out; 2283 2284 /* dofilewrite() will unuse the descriptor for us */ 2285 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2286 &offset, 0, retval)); 2287 2288 out: 2289 fd_putfile(fd); 2290 return (error); 2291 } 2292 2293 /* 2294 * Positional gather write system call. 2295 */ 2296 int 2297 sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2298 { 2299 /* { 2300 syscallarg(int) fd; 2301 syscallarg(const struct iovec *) iovp; 2302 syscallarg(int) iovcnt; 2303 syscallarg(off_t) offset; 2304 } */ 2305 off_t offset = SCARG(uap, offset); 2306 2307 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2308 SCARG(uap, iovcnt), &offset, 0, retval); 2309 } 2310 2311 /* 2312 * Check access permissions. 2313 */ 2314 int 2315 sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2316 { 2317 /* { 2318 syscallarg(const char *) path; 2319 syscallarg(int) flags; 2320 } */ 2321 kauth_cred_t cred; 2322 struct vnode *vp; 2323 int error, flags; 2324 struct pathbuf *pb; 2325 struct nameidata nd; 2326 2327 CTASSERT(F_OK == 0); 2328 if ((SCARG(uap, flags) & ~(R_OK | W_OK | X_OK)) != 0) { 2329 /* nonsense flags */ 2330 return EINVAL; 2331 } 2332 2333 error = pathbuf_copyin(SCARG(uap, path), &pb); 2334 if (error) { 2335 return error; 2336 } 2337 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2338 2339 /* Override default credentials */ 2340 cred = kauth_cred_dup(l->l_cred); 2341 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2342 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2343 nd.ni_cnd.cn_cred = cred; 2344 2345 if ((error = namei(&nd)) != 0) { 2346 pathbuf_destroy(pb); 2347 goto out; 2348 } 2349 vp = nd.ni_vp; 2350 pathbuf_destroy(pb); 2351 2352 /* Flags == 0 means only check for existence. */ 2353 if (SCARG(uap, flags)) { 2354 flags = 0; 2355 if (SCARG(uap, flags) & R_OK) 2356 flags |= VREAD; 2357 if (SCARG(uap, flags) & W_OK) 2358 flags |= VWRITE; 2359 if (SCARG(uap, flags) & X_OK) 2360 flags |= VEXEC; 2361 2362 error = VOP_ACCESS(vp, flags, cred); 2363 if (!error && (flags & VWRITE)) 2364 error = vn_writechk(vp); 2365 } 2366 vput(vp); 2367 out: 2368 kauth_cred_free(cred); 2369 return (error); 2370 } 2371 2372 int 2373 sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 2374 register_t *retval) 2375 { 2376 /* { 2377 syscallarg(int) fd; 2378 syscallarg(const char *) path; 2379 syscallarg(int) amode; 2380 syscallarg(int) flag; 2381 } */ 2382 2383 return ENOSYS; 2384 } 2385 2386 /* 2387 * Common code for all sys_stat functions, including compat versions. 2388 */ 2389 int 2390 do_sys_stat(const char *userpath, unsigned int nd_flags, struct stat *sb) 2391 { 2392 int error; 2393 struct pathbuf *pb; 2394 struct nameidata nd; 2395 2396 error = pathbuf_copyin(userpath, &pb); 2397 if (error) { 2398 return error; 2399 } 2400 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, pb); 2401 error = namei(&nd); 2402 if (error != 0) { 2403 pathbuf_destroy(pb); 2404 return error; 2405 } 2406 error = vn_stat(nd.ni_vp, sb); 2407 vput(nd.ni_vp); 2408 pathbuf_destroy(pb); 2409 return error; 2410 } 2411 2412 /* 2413 * Get file status; this version follows links. 2414 */ 2415 /* ARGSUSED */ 2416 int 2417 sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2418 { 2419 /* { 2420 syscallarg(const char *) path; 2421 syscallarg(struct stat *) ub; 2422 } */ 2423 struct stat sb; 2424 int error; 2425 2426 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2427 if (error) 2428 return error; 2429 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2430 } 2431 2432 /* 2433 * Get file status; this version does not follow links. 2434 */ 2435 /* ARGSUSED */ 2436 int 2437 sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2438 { 2439 /* { 2440 syscallarg(const char *) path; 2441 syscallarg(struct stat *) ub; 2442 } */ 2443 struct stat sb; 2444 int error; 2445 2446 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2447 if (error) 2448 return error; 2449 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2450 } 2451 2452 int 2453 sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 2454 register_t *retval) 2455 { 2456 /* { 2457 syscallarg(int) fd; 2458 syscallarg(const char *) path; 2459 syscallarg(struct stat *) ub; 2460 syscallarg(int) flag; 2461 } */ 2462 2463 return ENOSYS; 2464 } 2465 /* 2466 * Get configurable pathname variables. 2467 */ 2468 /* ARGSUSED */ 2469 int 2470 sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2471 { 2472 /* { 2473 syscallarg(const char *) path; 2474 syscallarg(int) name; 2475 } */ 2476 int error; 2477 struct pathbuf *pb; 2478 struct nameidata nd; 2479 2480 error = pathbuf_copyin(SCARG(uap, path), &pb); 2481 if (error) { 2482 return error; 2483 } 2484 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2485 if ((error = namei(&nd)) != 0) { 2486 pathbuf_destroy(pb); 2487 return (error); 2488 } 2489 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2490 vput(nd.ni_vp); 2491 pathbuf_destroy(pb); 2492 return (error); 2493 } 2494 2495 /* 2496 * Return target name of a symbolic link. 2497 */ 2498 /* ARGSUSED */ 2499 int 2500 sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2501 { 2502 /* { 2503 syscallarg(const char *) path; 2504 syscallarg(char *) buf; 2505 syscallarg(size_t) count; 2506 } */ 2507 struct vnode *vp; 2508 struct iovec aiov; 2509 struct uio auio; 2510 int error; 2511 struct pathbuf *pb; 2512 struct nameidata nd; 2513 2514 error = pathbuf_copyin(SCARG(uap, path), &pb); 2515 if (error) { 2516 return error; 2517 } 2518 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2519 if ((error = namei(&nd)) != 0) { 2520 pathbuf_destroy(pb); 2521 return error; 2522 } 2523 vp = nd.ni_vp; 2524 pathbuf_destroy(pb); 2525 if (vp->v_type != VLNK) 2526 error = EINVAL; 2527 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2528 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2529 aiov.iov_base = SCARG(uap, buf); 2530 aiov.iov_len = SCARG(uap, count); 2531 auio.uio_iov = &aiov; 2532 auio.uio_iovcnt = 1; 2533 auio.uio_offset = 0; 2534 auio.uio_rw = UIO_READ; 2535 KASSERT(l == curlwp); 2536 auio.uio_vmspace = l->l_proc->p_vmspace; 2537 auio.uio_resid = SCARG(uap, count); 2538 error = VOP_READLINK(vp, &auio, l->l_cred); 2539 } 2540 vput(vp); 2541 *retval = SCARG(uap, count) - auio.uio_resid; 2542 return (error); 2543 } 2544 2545 int 2546 sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 2547 register_t *retval) 2548 { 2549 /* { 2550 syscallarg(int) fd; 2551 syscallarg(const char *) path; 2552 syscallarg(char *) buf; 2553 syscallarg(size_t) count; 2554 } */ 2555 2556 return ENOSYS; 2557 } 2558 2559 /* 2560 * Change flags of a file given a path name. 2561 */ 2562 /* ARGSUSED */ 2563 int 2564 sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2565 { 2566 /* { 2567 syscallarg(const char *) path; 2568 syscallarg(u_long) flags; 2569 } */ 2570 struct vnode *vp; 2571 int error; 2572 2573 error = namei_simple_user(SCARG(uap, path), 2574 NSM_FOLLOW_TRYEMULROOT, &vp); 2575 if (error != 0) 2576 return (error); 2577 error = change_flags(vp, SCARG(uap, flags), l); 2578 vput(vp); 2579 return (error); 2580 } 2581 2582 /* 2583 * Change flags of a file given a file descriptor. 2584 */ 2585 /* ARGSUSED */ 2586 int 2587 sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 2588 { 2589 /* { 2590 syscallarg(int) fd; 2591 syscallarg(u_long) flags; 2592 } */ 2593 struct vnode *vp; 2594 file_t *fp; 2595 int error; 2596 2597 /* fd_getvnode() will use the descriptor for us */ 2598 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2599 return (error); 2600 vp = fp->f_data; 2601 error = change_flags(vp, SCARG(uap, flags), l); 2602 VOP_UNLOCK(vp); 2603 fd_putfile(SCARG(uap, fd)); 2604 return (error); 2605 } 2606 2607 /* 2608 * Change flags of a file given a path name; this version does 2609 * not follow links. 2610 */ 2611 int 2612 sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 2613 { 2614 /* { 2615 syscallarg(const char *) path; 2616 syscallarg(u_long) flags; 2617 } */ 2618 struct vnode *vp; 2619 int error; 2620 2621 error = namei_simple_user(SCARG(uap, path), 2622 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2623 if (error != 0) 2624 return (error); 2625 error = change_flags(vp, SCARG(uap, flags), l); 2626 vput(vp); 2627 return (error); 2628 } 2629 2630 /* 2631 * Common routine to change flags of a file. 2632 */ 2633 int 2634 change_flags(struct vnode *vp, u_long flags, struct lwp *l) 2635 { 2636 struct vattr vattr; 2637 int error; 2638 2639 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2640 /* 2641 * Non-superusers cannot change the flags on devices, even if they 2642 * own them. 2643 */ 2644 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2645 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2646 goto out; 2647 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 2648 error = EINVAL; 2649 goto out; 2650 } 2651 } 2652 vattr_null(&vattr); 2653 vattr.va_flags = flags; 2654 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2655 out: 2656 return (error); 2657 } 2658 2659 /* 2660 * Change mode of a file given path name; this version follows links. 2661 */ 2662 /* ARGSUSED */ 2663 int 2664 sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 2665 { 2666 /* { 2667 syscallarg(const char *) path; 2668 syscallarg(int) mode; 2669 } */ 2670 int error; 2671 struct vnode *vp; 2672 2673 error = namei_simple_user(SCARG(uap, path), 2674 NSM_FOLLOW_TRYEMULROOT, &vp); 2675 if (error != 0) 2676 return (error); 2677 2678 error = change_mode(vp, SCARG(uap, mode), l); 2679 2680 vrele(vp); 2681 return (error); 2682 } 2683 2684 /* 2685 * Change mode of a file given a file descriptor. 2686 */ 2687 /* ARGSUSED */ 2688 int 2689 sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 2690 { 2691 /* { 2692 syscallarg(int) fd; 2693 syscallarg(int) mode; 2694 } */ 2695 file_t *fp; 2696 int error; 2697 2698 /* fd_getvnode() will use the descriptor for us */ 2699 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2700 return (error); 2701 error = change_mode(fp->f_data, SCARG(uap, mode), l); 2702 fd_putfile(SCARG(uap, fd)); 2703 return (error); 2704 } 2705 2706 int 2707 sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 2708 register_t *retval) 2709 { 2710 /* { 2711 syscallarg(int) fd; 2712 syscallarg(const char *) path; 2713 syscallarg(int) mode; 2714 syscallarg(int) flag; 2715 } */ 2716 2717 return ENOSYS; 2718 } 2719 2720 /* 2721 * Change mode of a file given path name; this version does not follow links. 2722 */ 2723 /* ARGSUSED */ 2724 int 2725 sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 2726 { 2727 /* { 2728 syscallarg(const char *) path; 2729 syscallarg(int) mode; 2730 } */ 2731 int error; 2732 struct vnode *vp; 2733 2734 error = namei_simple_user(SCARG(uap, path), 2735 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2736 if (error != 0) 2737 return (error); 2738 2739 error = change_mode(vp, SCARG(uap, mode), l); 2740 2741 vrele(vp); 2742 return (error); 2743 } 2744 2745 /* 2746 * Common routine to set mode given a vnode. 2747 */ 2748 static int 2749 change_mode(struct vnode *vp, int mode, struct lwp *l) 2750 { 2751 struct vattr vattr; 2752 int error; 2753 2754 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2755 vattr_null(&vattr); 2756 vattr.va_mode = mode & ALLPERMS; 2757 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2758 VOP_UNLOCK(vp); 2759 return (error); 2760 } 2761 2762 /* 2763 * Set ownership given a path name; this version follows links. 2764 */ 2765 /* ARGSUSED */ 2766 int 2767 sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 2768 { 2769 /* { 2770 syscallarg(const char *) path; 2771 syscallarg(uid_t) uid; 2772 syscallarg(gid_t) gid; 2773 } */ 2774 int error; 2775 struct vnode *vp; 2776 2777 error = namei_simple_user(SCARG(uap, path), 2778 NSM_FOLLOW_TRYEMULROOT, &vp); 2779 if (error != 0) 2780 return (error); 2781 2782 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2783 2784 vrele(vp); 2785 return (error); 2786 } 2787 2788 /* 2789 * Set ownership given a path name; this version follows links. 2790 * Provides POSIX semantics. 2791 */ 2792 /* ARGSUSED */ 2793 int 2794 sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 2795 { 2796 /* { 2797 syscallarg(const char *) path; 2798 syscallarg(uid_t) uid; 2799 syscallarg(gid_t) gid; 2800 } */ 2801 int error; 2802 struct vnode *vp; 2803 2804 error = namei_simple_user(SCARG(uap, path), 2805 NSM_FOLLOW_TRYEMULROOT, &vp); 2806 if (error != 0) 2807 return (error); 2808 2809 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2810 2811 vrele(vp); 2812 return (error); 2813 } 2814 2815 /* 2816 * Set ownership given a file descriptor. 2817 */ 2818 /* ARGSUSED */ 2819 int 2820 sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 2821 { 2822 /* { 2823 syscallarg(int) fd; 2824 syscallarg(uid_t) uid; 2825 syscallarg(gid_t) gid; 2826 } */ 2827 int error; 2828 file_t *fp; 2829 2830 /* fd_getvnode() will use the descriptor for us */ 2831 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2832 return (error); 2833 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2834 l, 0); 2835 fd_putfile(SCARG(uap, fd)); 2836 return (error); 2837 } 2838 2839 int 2840 sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 2841 register_t *retval) 2842 { 2843 /* { 2844 syscallarg(int) fd; 2845 syscallarg(const char *) path; 2846 syscallarg(uid_t) uid; 2847 syscallarg(gid_t) gid; 2848 syscallarg(int) flag; 2849 } */ 2850 2851 return ENOSYS; 2852 } 2853 2854 /* 2855 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 2856 */ 2857 /* ARGSUSED */ 2858 int 2859 sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 2860 { 2861 /* { 2862 syscallarg(int) fd; 2863 syscallarg(uid_t) uid; 2864 syscallarg(gid_t) gid; 2865 } */ 2866 int error; 2867 file_t *fp; 2868 2869 /* fd_getvnode() will use the descriptor for us */ 2870 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 2871 return (error); 2872 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 2873 l, 1); 2874 fd_putfile(SCARG(uap, fd)); 2875 return (error); 2876 } 2877 2878 /* 2879 * Set ownership given a path name; this version does not follow links. 2880 */ 2881 /* ARGSUSED */ 2882 int 2883 sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 2884 { 2885 /* { 2886 syscallarg(const char *) path; 2887 syscallarg(uid_t) uid; 2888 syscallarg(gid_t) gid; 2889 } */ 2890 int error; 2891 struct vnode *vp; 2892 2893 error = namei_simple_user(SCARG(uap, path), 2894 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2895 if (error != 0) 2896 return (error); 2897 2898 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 2899 2900 vrele(vp); 2901 return (error); 2902 } 2903 2904 /* 2905 * Set ownership given a path name; this version does not follow links. 2906 * Provides POSIX/XPG semantics. 2907 */ 2908 /* ARGSUSED */ 2909 int 2910 sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 2911 { 2912 /* { 2913 syscallarg(const char *) path; 2914 syscallarg(uid_t) uid; 2915 syscallarg(gid_t) gid; 2916 } */ 2917 int error; 2918 struct vnode *vp; 2919 2920 error = namei_simple_user(SCARG(uap, path), 2921 NSM_NOFOLLOW_TRYEMULROOT, &vp); 2922 if (error != 0) 2923 return (error); 2924 2925 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 2926 2927 vrele(vp); 2928 return (error); 2929 } 2930 2931 /* 2932 * Common routine to set ownership given a vnode. 2933 */ 2934 static int 2935 change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 2936 int posix_semantics) 2937 { 2938 struct vattr vattr; 2939 mode_t newmode; 2940 int error; 2941 2942 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2943 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 2944 goto out; 2945 2946 #define CHANGED(x) ((int)(x) != -1) 2947 newmode = vattr.va_mode; 2948 if (posix_semantics) { 2949 /* 2950 * POSIX/XPG semantics: if the caller is not the super-user, 2951 * clear set-user-id and set-group-id bits. Both POSIX and 2952 * the XPG consider the behaviour for calls by the super-user 2953 * implementation-defined; we leave the set-user-id and set- 2954 * group-id settings intact in that case. 2955 */ 2956 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 2957 NULL) != 0) 2958 newmode &= ~(S_ISUID | S_ISGID); 2959 } else { 2960 /* 2961 * NetBSD semantics: when changing owner and/or group, 2962 * clear the respective bit(s). 2963 */ 2964 if (CHANGED(uid)) 2965 newmode &= ~S_ISUID; 2966 if (CHANGED(gid)) 2967 newmode &= ~S_ISGID; 2968 } 2969 /* Update va_mode iff altered. */ 2970 if (vattr.va_mode == newmode) 2971 newmode = VNOVAL; 2972 2973 vattr_null(&vattr); 2974 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 2975 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 2976 vattr.va_mode = newmode; 2977 error = VOP_SETATTR(vp, &vattr, l->l_cred); 2978 #undef CHANGED 2979 2980 out: 2981 VOP_UNLOCK(vp); 2982 return (error); 2983 } 2984 2985 /* 2986 * Set the access and modification times given a path name; this 2987 * version follows links. 2988 */ 2989 /* ARGSUSED */ 2990 int 2991 sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 2992 register_t *retval) 2993 { 2994 /* { 2995 syscallarg(const char *) path; 2996 syscallarg(const struct timeval *) tptr; 2997 } */ 2998 2999 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3000 SCARG(uap, tptr), UIO_USERSPACE); 3001 } 3002 3003 /* 3004 * Set the access and modification times given a file descriptor. 3005 */ 3006 /* ARGSUSED */ 3007 int 3008 sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3009 register_t *retval) 3010 { 3011 /* { 3012 syscallarg(int) fd; 3013 syscallarg(const struct timeval *) tptr; 3014 } */ 3015 int error; 3016 file_t *fp; 3017 3018 /* fd_getvnode() will use the descriptor for us */ 3019 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3020 return (error); 3021 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3022 UIO_USERSPACE); 3023 fd_putfile(SCARG(uap, fd)); 3024 return (error); 3025 } 3026 3027 int 3028 sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3029 register_t *retval) 3030 { 3031 /* { 3032 syscallarg(int) fd; 3033 syscallarg(const struct timespec *) tptr; 3034 } */ 3035 int error; 3036 file_t *fp; 3037 3038 /* fd_getvnode() will use the descriptor for us */ 3039 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3040 return (error); 3041 error = do_sys_utimens(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3042 UIO_USERSPACE); 3043 fd_putfile(SCARG(uap, fd)); 3044 return (error); 3045 } 3046 3047 /* 3048 * Set the access and modification times given a path name; this 3049 * version does not follow links. 3050 */ 3051 int 3052 sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3053 register_t *retval) 3054 { 3055 /* { 3056 syscallarg(const char *) path; 3057 syscallarg(const struct timeval *) tptr; 3058 } */ 3059 3060 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3061 SCARG(uap, tptr), UIO_USERSPACE); 3062 } 3063 3064 int 3065 sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3066 register_t *retval) 3067 { 3068 /* { 3069 syscallarg(int) fd; 3070 syscallarg(const char *) path; 3071 syscallarg(const struct timespec *) tptr; 3072 syscallarg(int) flag; 3073 } */ 3074 int follow; 3075 const struct timespec *tptr; 3076 3077 /* 3078 * Specified fd is not yet implemented 3079 */ 3080 if (SCARG(uap, fd) != AT_FDCWD) 3081 return ENOSYS; 3082 3083 tptr = SCARG(uap, tptr); 3084 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3085 3086 return do_sys_utimens(l, NULL, SCARG(uap, path), follow, 3087 tptr, UIO_USERSPACE); 3088 } 3089 3090 /* 3091 * Common routine to set access and modification times given a vnode. 3092 */ 3093 int 3094 do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3095 const struct timespec *tptr, enum uio_seg seg) 3096 { 3097 struct vattr vattr; 3098 int error, dorele = 0; 3099 namei_simple_flags_t sflags; 3100 3101 bool vanull, setbirthtime; 3102 struct timespec ts[2]; 3103 3104 /* 3105 * I have checked all callers and they pass either FOLLOW, 3106 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3107 * is 0. More to the point, they don't pass anything else. 3108 * Let's keep it that way at least until the namei interfaces 3109 * are fully sanitized. 3110 */ 3111 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3112 sflags = (flag == FOLLOW) ? 3113 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3114 3115 if (tptr == NULL) { 3116 vanull = true; 3117 nanotime(&ts[0]); 3118 ts[1] = ts[0]; 3119 } else { 3120 vanull = false; 3121 if (seg != UIO_SYSSPACE) { 3122 error = copyin(tptr, ts, sizeof (ts)); 3123 if (error != 0) 3124 return error; 3125 } else { 3126 ts[0] = tptr[0]; 3127 ts[1] = tptr[1]; 3128 } 3129 } 3130 3131 if (ts[0].tv_nsec == UTIME_NOW) { 3132 nanotime(&ts[0]); 3133 if (ts[1].tv_nsec == UTIME_NOW) { 3134 vanull = true; 3135 ts[1] = ts[0]; 3136 } 3137 } else if (ts[1].tv_nsec == UTIME_NOW) 3138 nanotime(&ts[1]); 3139 3140 if (vp == NULL) { 3141 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3142 error = namei_simple_user(path, sflags, &vp); 3143 if (error != 0) 3144 return error; 3145 dorele = 1; 3146 } 3147 3148 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3149 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3150 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3151 vattr_null(&vattr); 3152 3153 if (ts[0].tv_nsec != UTIME_OMIT) 3154 vattr.va_atime = ts[0]; 3155 3156 if (ts[1].tv_nsec != UTIME_OMIT) { 3157 vattr.va_mtime = ts[1]; 3158 if (setbirthtime) 3159 vattr.va_birthtime = ts[1]; 3160 } 3161 3162 if (vanull) 3163 vattr.va_vaflags |= VA_UTIMES_NULL; 3164 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3165 VOP_UNLOCK(vp); 3166 3167 if (dorele != 0) 3168 vrele(vp); 3169 3170 return error; 3171 } 3172 3173 int 3174 do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3175 const struct timeval *tptr, enum uio_seg seg) 3176 { 3177 struct timespec ts[2]; 3178 struct timespec *tsptr = NULL; 3179 int error; 3180 3181 if (tptr != NULL) { 3182 struct timeval tv[2]; 3183 3184 if (seg != UIO_SYSSPACE) { 3185 error = copyin(tptr, tv, sizeof (tv)); 3186 if (error != 0) 3187 return error; 3188 tptr = tv; 3189 } 3190 3191 if ((tv[0].tv_usec == UTIME_NOW) || 3192 (tv[0].tv_usec == UTIME_OMIT)) 3193 ts[0].tv_nsec = tv[0].tv_usec; 3194 else 3195 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3196 3197 if ((tv[1].tv_usec == UTIME_NOW) || 3198 (tv[1].tv_usec == UTIME_OMIT)) 3199 ts[1].tv_nsec = tv[1].tv_usec; 3200 else 3201 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3202 3203 tsptr = &ts[0]; 3204 } 3205 3206 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3207 } 3208 3209 /* 3210 * Truncate a file given its path name. 3211 */ 3212 /* ARGSUSED */ 3213 int 3214 sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3215 { 3216 /* { 3217 syscallarg(const char *) path; 3218 syscallarg(int) pad; 3219 syscallarg(off_t) length; 3220 } */ 3221 struct vnode *vp; 3222 struct vattr vattr; 3223 int error; 3224 3225 error = namei_simple_user(SCARG(uap, path), 3226 NSM_FOLLOW_TRYEMULROOT, &vp); 3227 if (error != 0) 3228 return (error); 3229 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3230 if (vp->v_type == VDIR) 3231 error = EISDIR; 3232 else if ((error = vn_writechk(vp)) == 0 && 3233 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3234 vattr_null(&vattr); 3235 vattr.va_size = SCARG(uap, length); 3236 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3237 } 3238 vput(vp); 3239 return (error); 3240 } 3241 3242 /* 3243 * Truncate a file given a file descriptor. 3244 */ 3245 /* ARGSUSED */ 3246 int 3247 sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3248 { 3249 /* { 3250 syscallarg(int) fd; 3251 syscallarg(int) pad; 3252 syscallarg(off_t) length; 3253 } */ 3254 struct vattr vattr; 3255 struct vnode *vp; 3256 file_t *fp; 3257 int error; 3258 3259 /* fd_getvnode() will use the descriptor for us */ 3260 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3261 return (error); 3262 if ((fp->f_flag & FWRITE) == 0) { 3263 error = EINVAL; 3264 goto out; 3265 } 3266 vp = fp->f_data; 3267 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3268 if (vp->v_type == VDIR) 3269 error = EISDIR; 3270 else if ((error = vn_writechk(vp)) == 0) { 3271 vattr_null(&vattr); 3272 vattr.va_size = SCARG(uap, length); 3273 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3274 } 3275 VOP_UNLOCK(vp); 3276 out: 3277 fd_putfile(SCARG(uap, fd)); 3278 return (error); 3279 } 3280 3281 /* 3282 * Sync an open file. 3283 */ 3284 /* ARGSUSED */ 3285 int 3286 sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3287 { 3288 /* { 3289 syscallarg(int) fd; 3290 } */ 3291 struct vnode *vp; 3292 file_t *fp; 3293 int error; 3294 3295 /* fd_getvnode() will use the descriptor for us */ 3296 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3297 return (error); 3298 vp = fp->f_data; 3299 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3300 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3301 VOP_UNLOCK(vp); 3302 fd_putfile(SCARG(uap, fd)); 3303 return (error); 3304 } 3305 3306 /* 3307 * Sync a range of file data. API modeled after that found in AIX. 3308 * 3309 * FDATASYNC indicates that we need only save enough metadata to be able 3310 * to re-read the written data. Note we duplicate AIX's requirement that 3311 * the file be open for writing. 3312 */ 3313 /* ARGSUSED */ 3314 int 3315 sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3316 { 3317 /* { 3318 syscallarg(int) fd; 3319 syscallarg(int) flags; 3320 syscallarg(off_t) start; 3321 syscallarg(off_t) length; 3322 } */ 3323 struct vnode *vp; 3324 file_t *fp; 3325 int flags, nflags; 3326 off_t s, e, len; 3327 int error; 3328 3329 /* fd_getvnode() will use the descriptor for us */ 3330 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3331 return (error); 3332 3333 if ((fp->f_flag & FWRITE) == 0) { 3334 error = EBADF; 3335 goto out; 3336 } 3337 3338 flags = SCARG(uap, flags); 3339 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3340 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3341 error = EINVAL; 3342 goto out; 3343 } 3344 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3345 if (flags & FDATASYNC) 3346 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3347 else 3348 nflags = FSYNC_WAIT; 3349 if (flags & FDISKSYNC) 3350 nflags |= FSYNC_CACHE; 3351 3352 len = SCARG(uap, length); 3353 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 3354 if (len) { 3355 s = SCARG(uap, start); 3356 e = s + len; 3357 if (e < s) { 3358 error = EINVAL; 3359 goto out; 3360 } 3361 } else { 3362 e = 0; 3363 s = 0; 3364 } 3365 3366 vp = fp->f_data; 3367 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3368 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3369 VOP_UNLOCK(vp); 3370 out: 3371 fd_putfile(SCARG(uap, fd)); 3372 return (error); 3373 } 3374 3375 /* 3376 * Sync the data of an open file. 3377 */ 3378 /* ARGSUSED */ 3379 int 3380 sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3381 { 3382 /* { 3383 syscallarg(int) fd; 3384 } */ 3385 struct vnode *vp; 3386 file_t *fp; 3387 int error; 3388 3389 /* fd_getvnode() will use the descriptor for us */ 3390 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3391 return (error); 3392 if ((fp->f_flag & FWRITE) == 0) { 3393 fd_putfile(SCARG(uap, fd)); 3394 return (EBADF); 3395 } 3396 vp = fp->f_data; 3397 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3398 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3399 VOP_UNLOCK(vp); 3400 fd_putfile(SCARG(uap, fd)); 3401 return (error); 3402 } 3403 3404 /* 3405 * Rename files, (standard) BSD semantics frontend. 3406 */ 3407 /* ARGSUSED */ 3408 int 3409 sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3410 { 3411 /* { 3412 syscallarg(const char *) from; 3413 syscallarg(const char *) to; 3414 } */ 3415 3416 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3417 } 3418 3419 int 3420 sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 3421 register_t *retval) 3422 { 3423 /* { 3424 syscallarg(int) fromfd; 3425 syscallarg(const char *) from; 3426 syscallarg(int) tofd; 3427 syscallarg(const char *) to; 3428 } */ 3429 3430 return ENOSYS; 3431 } 3432 3433 /* 3434 * Rename files, POSIX semantics frontend. 3435 */ 3436 /* ARGSUSED */ 3437 int 3438 sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3439 { 3440 /* { 3441 syscallarg(const char *) from; 3442 syscallarg(const char *) to; 3443 } */ 3444 3445 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3446 } 3447 3448 /* 3449 * Rename files. Source and destination must either both be directories, 3450 * or both not be directories. If target is a directory, it must be empty. 3451 * If `from' and `to' refer to the same object, the value of the `retain' 3452 * argument is used to determine whether `from' will be 3453 * 3454 * (retain == 0) deleted unless `from' and `to' refer to the same 3455 * object in the file system's name space (BSD). 3456 * (retain == 1) always retained (POSIX). 3457 */ 3458 int 3459 do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3460 { 3461 struct vnode *tvp, *fvp, *tdvp; 3462 struct pathbuf *frompb, *topb; 3463 struct nameidata fromnd, tond; 3464 struct mount *fs; 3465 int error; 3466 3467 error = pathbuf_maybe_copyin(from, seg, &frompb); 3468 if (error) { 3469 return error; 3470 } 3471 error = pathbuf_maybe_copyin(to, seg, &topb); 3472 if (error) { 3473 pathbuf_destroy(frompb); 3474 return error; 3475 } 3476 3477 NDINIT(&fromnd, DELETE, LOCKPARENT | TRYEMULROOT | INRENAME, 3478 frompb); 3479 if ((error = namei(&fromnd)) != 0) { 3480 pathbuf_destroy(frompb); 3481 pathbuf_destroy(topb); 3482 return (error); 3483 } 3484 if (fromnd.ni_dvp != fromnd.ni_vp) 3485 VOP_UNLOCK(fromnd.ni_dvp); 3486 fvp = fromnd.ni_vp; 3487 3488 fs = fvp->v_mount; 3489 error = VFS_RENAMELOCK_ENTER(fs); 3490 if (error) { 3491 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3492 vrele(fromnd.ni_dvp); 3493 vrele(fvp); 3494 goto out1; 3495 } 3496 3497 /* 3498 * close, partially, yet another race - ideally we should only 3499 * go as far as getting fromnd.ni_dvp before getting the per-fs 3500 * lock, and then continue to get fromnd.ni_vp, but we can't do 3501 * that with namei as it stands. 3502 * 3503 * This still won't prevent rmdir from nuking fromnd.ni_vp 3504 * under us. The real fix is to get the locks in the right 3505 * order and do the lookups in the right places, but that's a 3506 * major rototill. 3507 * 3508 * Note: this logic (as well as this whole function) is cloned 3509 * in nfs_serv.c. Proceed accordingly. 3510 */ 3511 vrele(fvp); 3512 if ((fromnd.ni_cnd.cn_namelen == 1 && 3513 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3514 (fromnd.ni_cnd.cn_namelen == 2 && 3515 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3516 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3517 error = EINVAL; 3518 VFS_RENAMELOCK_EXIT(fs); 3519 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3520 vrele(fromnd.ni_dvp); 3521 goto out1; 3522 } 3523 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3524 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd, 0); 3525 if (error) { 3526 VOP_UNLOCK(fromnd.ni_dvp); 3527 VFS_RENAMELOCK_EXIT(fs); 3528 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3529 vrele(fromnd.ni_dvp); 3530 goto out1; 3531 } 3532 VOP_UNLOCK(fromnd.ni_vp); 3533 if (fromnd.ni_dvp != fromnd.ni_vp) 3534 VOP_UNLOCK(fromnd.ni_dvp); 3535 fvp = fromnd.ni_vp; 3536 3537 NDINIT(&tond, RENAME, 3538 LOCKPARENT | LOCKLEAF | NOCACHE | TRYEMULROOT 3539 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0), 3540 topb); 3541 if ((error = namei(&tond)) != 0) { 3542 VFS_RENAMELOCK_EXIT(fs); 3543 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3544 vrele(fromnd.ni_dvp); 3545 vrele(fvp); 3546 goto out1; 3547 } 3548 tdvp = tond.ni_dvp; 3549 tvp = tond.ni_vp; 3550 3551 if (tvp != NULL) { 3552 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3553 error = ENOTDIR; 3554 goto out; 3555 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3556 error = EISDIR; 3557 goto out; 3558 } 3559 } 3560 3561 if (fvp == tdvp) 3562 error = EINVAL; 3563 3564 /* 3565 * Source and destination refer to the same object. 3566 */ 3567 if (fvp == tvp) { 3568 if (retain) 3569 error = -1; 3570 else if (fromnd.ni_dvp == tdvp && 3571 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3572 !memcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, 3573 fromnd.ni_cnd.cn_namelen)) 3574 error = -1; 3575 } 3576 /* 3577 * Prevent cross-mount operation. 3578 */ 3579 if (error == 0) { 3580 if (tond.ni_dvp->v_mount != fromnd.ni_dvp->v_mount) { 3581 error = EXDEV; 3582 } 3583 } 3584 #if NVERIEXEC > 0 3585 if (!error) { 3586 char *f1, *f2; 3587 size_t f1_len; 3588 size_t f2_len; 3589 3590 f1_len = fromnd.ni_cnd.cn_namelen + 1; 3591 f1 = kmem_alloc(f1_len, KM_SLEEP); 3592 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 3593 3594 f2_len = tond.ni_cnd.cn_namelen + 1; 3595 f2 = kmem_alloc(f2_len, KM_SLEEP); 3596 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 3597 3598 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 3599 3600 kmem_free(f1, f1_len); 3601 kmem_free(f2, f2_len); 3602 } 3603 #endif /* NVERIEXEC > 0 */ 3604 3605 out: 3606 if (!error) { 3607 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3608 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3609 VFS_RENAMELOCK_EXIT(fs); 3610 } else { 3611 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 3612 if (tdvp == tvp) 3613 vrele(tdvp); 3614 else 3615 vput(tdvp); 3616 if (tvp) 3617 vput(tvp); 3618 VFS_RENAMELOCK_EXIT(fs); 3619 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3620 vrele(fromnd.ni_dvp); 3621 vrele(fvp); 3622 } 3623 out1: 3624 pathbuf_destroy(frompb); 3625 pathbuf_destroy(topb); 3626 return (error == -1 ? 0 : error); 3627 } 3628 3629 /* 3630 * Make a directory file. 3631 */ 3632 /* ARGSUSED */ 3633 int 3634 sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 3635 { 3636 /* { 3637 syscallarg(const char *) path; 3638 syscallarg(int) mode; 3639 } */ 3640 3641 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE); 3642 } 3643 3644 int 3645 sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 3646 register_t *retval) 3647 { 3648 /* { 3649 syscallarg(int) fd; 3650 syscallarg(const char *) path; 3651 syscallarg(int) mode; 3652 } */ 3653 3654 return ENOSYS; 3655 } 3656 3657 3658 int 3659 do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 3660 { 3661 struct proc *p = curlwp->l_proc; 3662 struct vnode *vp; 3663 struct vattr vattr; 3664 int error; 3665 struct pathbuf *pb; 3666 struct nameidata nd; 3667 3668 /* XXX bollocks, should pass in a pathbuf */ 3669 error = pathbuf_maybe_copyin(path, seg, &pb); 3670 if (error) { 3671 return error; 3672 } 3673 3674 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 3675 if ((error = namei(&nd)) != 0) { 3676 pathbuf_destroy(pb); 3677 return (error); 3678 } 3679 vp = nd.ni_vp; 3680 if (vp != NULL) { 3681 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3682 if (nd.ni_dvp == vp) 3683 vrele(nd.ni_dvp); 3684 else 3685 vput(nd.ni_dvp); 3686 vrele(vp); 3687 pathbuf_destroy(pb); 3688 return (EEXIST); 3689 } 3690 vattr_null(&vattr); 3691 vattr.va_type = VDIR; 3692 /* We will read cwdi->cwdi_cmask unlocked. */ 3693 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 3694 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3695 if (!error) 3696 vput(nd.ni_vp); 3697 pathbuf_destroy(pb); 3698 return (error); 3699 } 3700 3701 /* 3702 * Remove a directory file. 3703 */ 3704 /* ARGSUSED */ 3705 int 3706 sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 3707 { 3708 /* { 3709 syscallarg(const char *) path; 3710 } */ 3711 struct vnode *vp; 3712 int error; 3713 struct pathbuf *pb; 3714 struct nameidata nd; 3715 3716 error = pathbuf_copyin(SCARG(uap, path), &pb); 3717 if (error) { 3718 return error; 3719 } 3720 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 3721 if ((error = namei(&nd)) != 0) { 3722 pathbuf_destroy(pb); 3723 return error; 3724 } 3725 vp = nd.ni_vp; 3726 if (vp->v_type != VDIR) { 3727 error = ENOTDIR; 3728 goto out; 3729 } 3730 /* 3731 * No rmdir "." please. 3732 */ 3733 if (nd.ni_dvp == vp) { 3734 error = EINVAL; 3735 goto out; 3736 } 3737 /* 3738 * The root of a mounted filesystem cannot be deleted. 3739 */ 3740 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 3741 error = EBUSY; 3742 goto out; 3743 } 3744 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3745 pathbuf_destroy(pb); 3746 return (error); 3747 3748 out: 3749 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 3750 if (nd.ni_dvp == vp) 3751 vrele(nd.ni_dvp); 3752 else 3753 vput(nd.ni_dvp); 3754 vput(vp); 3755 pathbuf_destroy(pb); 3756 return (error); 3757 } 3758 3759 /* 3760 * Read a block of directory entries in a file system independent format. 3761 */ 3762 int 3763 sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 3764 { 3765 /* { 3766 syscallarg(int) fd; 3767 syscallarg(char *) buf; 3768 syscallarg(size_t) count; 3769 } */ 3770 file_t *fp; 3771 int error, done; 3772 3773 /* fd_getvnode() will use the descriptor for us */ 3774 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3775 return (error); 3776 if ((fp->f_flag & FREAD) == 0) { 3777 error = EBADF; 3778 goto out; 3779 } 3780 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 3781 SCARG(uap, count), &done, l, 0, 0); 3782 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 3783 *retval = done; 3784 out: 3785 fd_putfile(SCARG(uap, fd)); 3786 return (error); 3787 } 3788 3789 /* 3790 * Set the mode mask for creation of filesystem nodes. 3791 */ 3792 int 3793 sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 3794 { 3795 /* { 3796 syscallarg(mode_t) newmask; 3797 } */ 3798 struct proc *p = l->l_proc; 3799 struct cwdinfo *cwdi; 3800 3801 /* 3802 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 3803 * important is that we serialize changes to the mask. The 3804 * rw_exit() will issue a write memory barrier on our behalf, 3805 * and force the changes out to other CPUs (as it must use an 3806 * atomic operation, draining the local CPU's store buffers). 3807 */ 3808 cwdi = p->p_cwdi; 3809 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 3810 *retval = cwdi->cwdi_cmask; 3811 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 3812 rw_exit(&cwdi->cwdi_lock); 3813 3814 return (0); 3815 } 3816 3817 int 3818 dorevoke(struct vnode *vp, kauth_cred_t cred) 3819 { 3820 struct vattr vattr; 3821 int error; 3822 3823 vn_lock(vp, LK_SHARED | LK_RETRY); 3824 error = VOP_GETATTR(vp, &vattr, cred); 3825 VOP_UNLOCK(vp); 3826 if (error != 0) 3827 return error; 3828 if (kauth_cred_geteuid(cred) == vattr.va_uid || 3829 (error = kauth_authorize_generic(cred, 3830 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 3831 VOP_REVOKE(vp, REVOKEALL); 3832 return (error); 3833 } 3834 3835 /* 3836 * Void all references to file by ripping underlying filesystem 3837 * away from vnode. 3838 */ 3839 /* ARGSUSED */ 3840 int 3841 sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 3842 { 3843 /* { 3844 syscallarg(const char *) path; 3845 } */ 3846 struct vnode *vp; 3847 int error; 3848 3849 error = namei_simple_user(SCARG(uap, path), 3850 NSM_FOLLOW_TRYEMULROOT, &vp); 3851 if (error != 0) 3852 return (error); 3853 error = dorevoke(vp, l->l_cred); 3854 vrele(vp); 3855 return (error); 3856 } 3857