1 /* $OpenBSD: ufs_vnops.c,v 1.164 2024/10/18 05:52:33 miod Exp $ */ 2 /* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)ufs_vnops.c 8.14 (Berkeley) 10/26/94 38 */ 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/namei.h> 43 #include <sys/resourcevar.h> 44 #include <sys/kernel.h> 45 #include <sys/fcntl.h> 46 #include <sys/file.h> 47 #include <sys/stat.h> 48 #include <sys/buf.h> 49 #include <sys/proc.h> 50 #include <sys/mount.h> 51 #include <sys/vnode.h> 52 #include <sys/malloc.h> 53 #include <sys/pool.h> 54 #include <sys/dirent.h> 55 #include <sys/lockf.h> 56 #include <sys/event.h> 57 #include <sys/specdev.h> 58 #include <sys/unistd.h> 59 60 #include <miscfs/fifofs/fifo.h> 61 62 #include <ufs/ufs/quota.h> 63 #include <ufs/ufs/inode.h> 64 #include <ufs/ufs/dir.h> 65 #include <ufs/ufs/ufsmount.h> 66 #include <ufs/ufs/ufs_extern.h> 67 #ifdef UFS_DIRHASH 68 #include <ufs/ufs/dirhash.h> 69 #endif 70 #include <ufs/ext2fs/ext2fs_extern.h> 71 72 #include <uvm/uvm_extern.h> 73 74 int ufs_chmod(struct vnode *, int, struct ucred *); 75 int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *); 76 int filt_ufsread(struct knote *, long); 77 int filt_ufswrite(struct knote *, long); 78 int filt_ufsvnode(struct knote *, long); 79 void filt_ufsdetach(struct knote *); 80 81 /* 82 * A virgin directory (no blushing please). 83 */ 84 static const struct dirtemplate mastertemplate = { 85 0, 12, DT_DIR, 1, ".", 86 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." 87 }; 88 89 /* 90 * Update the times in the inode 91 */ 92 void 93 ufs_itimes(struct vnode *vp) 94 { 95 struct inode *ip; 96 struct timespec ts; 97 98 ip = VTOI(vp); 99 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) 100 return; 101 102 if (vp->v_mount->mnt_flag & MNT_RDONLY) 103 goto out; 104 105 #ifdef EXT2FS 106 if (IS_EXT2_VNODE(ip->i_vnode)) { 107 EXT2FS_ITIMES(ip); 108 goto out; 109 } 110 #endif 111 112 if ((vp->v_type == VBLK || vp->v_type == VCHR)) 113 ip->i_flag |= IN_LAZYMOD; 114 else 115 ip->i_flag |= IN_MODIFIED; 116 117 getnanotime(&ts); 118 if (ip->i_flag & IN_ACCESS) { 119 DIP_ASSIGN(ip, atime, ts.tv_sec); 120 DIP_ASSIGN(ip, atimensec, ts.tv_nsec); 121 } 122 if (ip->i_flag & IN_UPDATE) { 123 DIP_ASSIGN(ip, mtime, ts.tv_sec); 124 DIP_ASSIGN(ip, mtimensec, ts.tv_nsec); 125 } 126 if (ip->i_flag & IN_CHANGE) { 127 DIP_ASSIGN(ip, ctime, ts.tv_sec); 128 DIP_ASSIGN(ip, ctimensec, ts.tv_nsec); 129 ip->i_modrev++; 130 } 131 132 out: 133 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); 134 } 135 136 137 /* 138 * Create a regular file 139 */ 140 int 141 ufs_create(void *v) 142 { 143 struct vop_create_args *ap = v; 144 int error; 145 146 error = 147 ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 148 ap->a_dvp, ap->a_vpp, ap->a_cnp); 149 if (error == 0) 150 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 151 return (error); 152 } 153 154 /* 155 * Mknod vnode call 156 */ 157 int 158 ufs_mknod(void *v) 159 { 160 struct vop_mknod_args *ap = v; 161 struct vattr *vap = ap->a_vap; 162 struct vnode **vpp = ap->a_vpp; 163 struct inode *ip; 164 int error; 165 166 if ((error = 167 ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 168 ap->a_dvp, vpp, ap->a_cnp)) != 0) 169 return (error); 170 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 171 ip = VTOI(*vpp); 172 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 173 if (vap->va_rdev != VNOVAL) { 174 /* 175 * Want to be able to use this to make badblock 176 * inodes, so don't truncate the dev number. 177 */ 178 DIP_ASSIGN(ip, rdev, vap->va_rdev); 179 } 180 /* 181 * Remove inode so that it will be reloaded by VFS_VGET and 182 * checked to see if it is an alias of an existing entry in 183 * the inode cache. 184 */ 185 vput(*vpp); 186 (*vpp)->v_type = VNON; 187 vgone(*vpp); 188 *vpp = NULL; 189 return (0); 190 } 191 192 /* 193 * Open called. 194 * 195 * Nothing to do. 196 */ 197 int 198 ufs_open(void *v) 199 { 200 struct vop_open_args *ap = v; 201 struct inode *ip = VTOI(ap->a_vp); 202 203 /* 204 * Files marked append-only must be opened for appending. 205 */ 206 if ((DIP(ip, flags) & APPEND) && 207 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 208 return (EPERM); 209 210 if (ap->a_mode & O_TRUNC) 211 ip->i_flag |= IN_CHANGE | IN_UPDATE; 212 213 return (0); 214 } 215 216 /* 217 * Close called. 218 * 219 * Update the times on the inode. 220 */ 221 int 222 ufs_close(void *v) 223 { 224 struct vop_close_args *ap = v; 225 struct vnode *vp = ap->a_vp; 226 227 if (vp->v_usecount > 1) 228 ufs_itimes(vp); 229 return (0); 230 } 231 232 int 233 ufs_access(void *v) 234 { 235 struct vop_access_args *ap = v; 236 struct vnode *vp = ap->a_vp; 237 struct inode *ip = VTOI(vp); 238 mode_t mode = ap->a_mode; 239 240 /* 241 * Disallow write attempts on read-only file systems; 242 * unless the file is a socket, fifo, or a block or 243 * character device resident on the file system. 244 */ 245 if (mode & VWRITE) { 246 switch (vp->v_type) { 247 int error; 248 case VDIR: 249 case VLNK: 250 case VREG: 251 if (vp->v_mount->mnt_flag & MNT_RDONLY) 252 return (EROFS); 253 254 if ((error = getinoquota(ip)) != 0) 255 return (error); 256 break; 257 case VBAD: 258 case VBLK: 259 case VCHR: 260 case VSOCK: 261 case VFIFO: 262 case VNON: 263 break; 264 265 } 266 } 267 268 /* If immutable bit set, nobody gets to write it. */ 269 if ((mode & VWRITE) && (DIP(ip, flags) & IMMUTABLE)) 270 return (EPERM); 271 272 if (vnoperm(vp)) { 273 /* For VEXEC, at least one of the execute bits must be set. */ 274 if ((mode & VEXEC) && vp->v_type != VDIR && 275 (DIP(ip, mode) & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 276 return EACCES; 277 return 0; 278 } 279 280 return (vaccess(vp->v_type, DIP(ip, mode), DIP(ip, uid), DIP(ip, gid), 281 mode, ap->a_cred)); 282 } 283 284 int 285 ufs_getattr(void *v) 286 { 287 struct vop_getattr_args *ap = v; 288 struct vnode *vp = ap->a_vp; 289 struct inode *ip = VTOI(vp); 290 struct vattr *vap = ap->a_vap; 291 292 ufs_itimes(vp); 293 294 /* 295 * Copy from inode table 296 */ 297 vap->va_fsid = ip->i_dev; 298 vap->va_fileid = ip->i_number; 299 vap->va_mode = DIP(ip, mode) & ~IFMT; 300 vap->va_nlink = ip->i_effnlink; 301 vap->va_uid = DIP(ip, uid); 302 vap->va_gid = DIP(ip, gid); 303 vap->va_rdev = (dev_t) DIP(ip, rdev); 304 vap->va_size = DIP(ip, size); 305 vap->va_atime.tv_sec = DIP(ip, atime); 306 vap->va_atime.tv_nsec = DIP(ip, atimensec); 307 vap->va_mtime.tv_sec = DIP(ip, mtime); 308 vap->va_mtime.tv_nsec = DIP(ip, mtimensec); 309 vap->va_ctime.tv_sec = DIP(ip, ctime); 310 vap->va_ctime.tv_nsec = DIP(ip, ctimensec); 311 vap->va_flags = DIP(ip, flags); 312 vap->va_gen = DIP(ip, gen); 313 /* this doesn't belong here */ 314 if (vp->v_type == VBLK) 315 vap->va_blocksize = BLKDEV_IOSIZE; 316 else if (vp->v_type == VCHR) 317 vap->va_blocksize = MAXBSIZE; 318 else 319 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 320 vap->va_bytes = dbtob((u_quad_t) DIP(ip, blocks)); 321 vap->va_type = vp->v_type; 322 vap->va_filerev = ip->i_modrev; 323 return (0); 324 } 325 326 /* 327 * Set attribute vnode op. called from several syscalls 328 */ 329 int 330 ufs_setattr(void *v) 331 { 332 struct vop_setattr_args *ap = v; 333 struct vattr *vap = ap->a_vap; 334 struct vnode *vp = ap->a_vp; 335 struct inode *ip = VTOI(vp); 336 struct ucred *cred = ap->a_cred; 337 int error; 338 long hint = NOTE_ATTRIB; 339 u_quad_t oldsize; 340 341 /* 342 * Check for unsettable attributes. 343 */ 344 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 345 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 346 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 347 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 348 return (EINVAL); 349 } 350 if (vap->va_flags != VNOVAL) { 351 if (vp->v_mount->mnt_flag & MNT_RDONLY) 352 return (EROFS); 353 if (cred->cr_uid != DIP(ip, uid) && 354 !vnoperm(vp) && 355 (error = suser_ucred(cred))) 356 return (error); 357 if (cred->cr_uid == 0 || vnoperm(vp)) { 358 if ((DIP(ip, flags) & (SF_IMMUTABLE | SF_APPEND)) && 359 securelevel > 0) 360 return (EPERM); 361 DIP_ASSIGN(ip, flags, vap->va_flags); 362 } else { 363 if (DIP(ip, flags) & (SF_IMMUTABLE | SF_APPEND) || 364 (vap->va_flags & UF_SETTABLE) != vap->va_flags) 365 return (EPERM); 366 DIP_AND(ip, flags, SF_SETTABLE); 367 DIP_OR(ip, flags, vap->va_flags & UF_SETTABLE); 368 } 369 ip->i_flag |= IN_CHANGE; 370 if (vap->va_flags & (IMMUTABLE | APPEND)) 371 return (0); 372 } 373 if (DIP(ip, flags) & (IMMUTABLE | APPEND)) 374 return (EPERM); 375 /* 376 * Go through the fields and update if not VNOVAL. 377 */ 378 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 379 if (vp->v_mount->mnt_flag & MNT_RDONLY) 380 return (EROFS); 381 error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred); 382 if (error) 383 return (error); 384 } 385 if (vap->va_size != VNOVAL) { 386 oldsize = DIP(ip, size); 387 /* 388 * Disallow write attempts on read-only file systems; 389 * unless the file is a socket, fifo, or a block or 390 * character device resident on the file system. 391 */ 392 switch (vp->v_type) { 393 case VDIR: 394 return (EISDIR); 395 case VLNK: 396 case VREG: 397 if (vp->v_mount->mnt_flag & MNT_RDONLY) 398 return (EROFS); 399 break; 400 default: 401 break; 402 } 403 if ((error = UFS_TRUNCATE(ip, vap->va_size, 0, cred)) != 0) 404 return (error); 405 if (vap->va_size < oldsize) 406 hint |= NOTE_TRUNCATE; 407 } 408 if ((vap->va_vaflags & VA_UTIMES_CHANGE) || 409 vap->va_atime.tv_nsec != VNOVAL || 410 vap->va_mtime.tv_nsec != VNOVAL) { 411 if (vp->v_mount->mnt_flag & MNT_RDONLY) 412 return (EROFS); 413 if (cred->cr_uid != DIP(ip, uid) && 414 !vnoperm(vp) && 415 (error = suser_ucred(cred)) && 416 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || 417 (error = VOP_ACCESS(vp, VWRITE, cred, ap->a_p)))) 418 return (error); 419 if (vap->va_mtime.tv_nsec != VNOVAL) 420 ip->i_flag |= IN_CHANGE | IN_UPDATE; 421 else if (vap->va_vaflags & VA_UTIMES_CHANGE) 422 ip->i_flag |= IN_CHANGE; 423 if (vap->va_atime.tv_nsec != VNOVAL) { 424 if (!(vp->v_mount->mnt_flag & MNT_NOATIME) || 425 (ip->i_flag & (IN_CHANGE | IN_UPDATE))) 426 ip->i_flag |= IN_ACCESS; 427 } 428 ufs_itimes(vp); 429 if (vap->va_mtime.tv_nsec != VNOVAL) { 430 DIP_ASSIGN(ip, mtime, vap->va_mtime.tv_sec); 431 DIP_ASSIGN(ip, mtimensec, vap->va_mtime.tv_nsec); 432 } 433 if (vap->va_atime.tv_nsec != VNOVAL) { 434 DIP_ASSIGN(ip, atime, vap->va_atime.tv_sec); 435 DIP_ASSIGN(ip, atimensec, vap->va_atime.tv_nsec); 436 } 437 error = UFS_UPDATE(ip, 0); 438 if (error) 439 return (error); 440 } 441 error = 0; 442 if (vap->va_mode != (mode_t)VNOVAL) { 443 if (vp->v_mount->mnt_flag & MNT_RDONLY) 444 return (EROFS); 445 error = ufs_chmod(vp, (int)vap->va_mode, cred); 446 } 447 VN_KNOTE(vp, hint); 448 return (error); 449 } 450 451 /* 452 * Change the mode on a file. 453 * Inode must be locked before calling. 454 */ 455 int 456 ufs_chmod(struct vnode *vp, int mode, struct ucred *cred) 457 { 458 struct inode *ip = VTOI(vp); 459 int error; 460 461 if (cred->cr_uid != DIP(ip, uid) && 462 !vnoperm(vp) && 463 (error = suser_ucred(cred))) 464 return (error); 465 if (cred->cr_uid && !vnoperm(vp)) { 466 if (vp->v_type != VDIR && (mode & S_ISTXT)) 467 return (EFTYPE); 468 if (!groupmember(DIP(ip, gid), cred) && (mode & ISGID)) 469 return (EPERM); 470 } 471 DIP_AND(ip, mode, ~ALLPERMS); 472 DIP_OR(ip, mode, mode & ALLPERMS); 473 ip->i_flag |= IN_CHANGE; 474 if ((vp->v_flag & VTEXT) && (DIP(ip, mode) & S_ISTXT) == 0) 475 (void) uvm_vnp_uncache(vp); 476 return (0); 477 } 478 479 /* 480 * Perform chown operation on inode ip; 481 * inode must be locked prior to call. 482 */ 483 int 484 ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) 485 { 486 struct inode *ip = VTOI(vp); 487 uid_t ouid; 488 gid_t ogid; 489 int error = 0; 490 daddr_t change; 491 enum ufs_quota_flags quota_flags = 0; 492 493 if (uid == (uid_t)VNOVAL) 494 uid = DIP(ip, uid); 495 if (gid == (gid_t)VNOVAL) 496 gid = DIP(ip, gid); 497 /* 498 * If we don't own the file, are trying to change the owner 499 * of the file, or are not a member of the target group, 500 * the caller must be superuser or the call fails. 501 */ 502 if ((cred->cr_uid != DIP(ip, uid) || uid != DIP(ip, uid) || 503 (gid != DIP(ip, gid) && !groupmember(gid, cred))) && 504 !vnoperm(vp) && 505 (error = suser_ucred(cred))) 506 return (error); 507 ogid = DIP(ip, gid); 508 ouid = DIP(ip, uid); 509 change = DIP(ip, blocks); 510 511 if (ouid == uid) 512 quota_flags |= UFS_QUOTA_NOUID; 513 514 if (ogid == gid) 515 quota_flags |= UFS_QUOTA_NOGID; 516 517 if ((error = getinoquota(ip)) != 0) 518 return (error); 519 (void) ufs_quota_free_blocks2(ip, change, cred, quota_flags); 520 (void) ufs_quota_free_inode2(ip, cred, quota_flags); 521 (void) ufs_quota_delete(ip); 522 523 DIP_ASSIGN(ip, gid, gid); 524 DIP_ASSIGN(ip, uid, uid); 525 526 if ((error = getinoquota(ip)) != 0) 527 goto error; 528 529 if ((error = ufs_quota_alloc_blocks2(ip, change, cred, 530 quota_flags)) != 0) 531 goto error; 532 533 if ((error = ufs_quota_alloc_inode2(ip, cred , 534 quota_flags)) != 0) { 535 (void)ufs_quota_free_blocks2(ip, change, cred, 536 quota_flags); 537 goto error; 538 } 539 540 if (getinoquota(ip)) 541 panic("chown: lost quota"); 542 543 if (ouid != uid || ogid != gid) 544 ip->i_flag |= IN_CHANGE; 545 if (!vnoperm(vp)) { 546 if (ouid != uid && cred->cr_uid != 0) 547 DIP_AND(ip, mode, ~ISUID); 548 if (ogid != gid && cred->cr_uid != 0) 549 DIP_AND(ip, mode, ~ISGID); 550 } 551 return (0); 552 553 error: 554 (void) ufs_quota_delete(ip); 555 556 DIP_ASSIGN(ip, gid, ogid); 557 DIP_ASSIGN(ip, uid, ouid); 558 559 if (getinoquota(ip) == 0) { 560 (void) ufs_quota_alloc_blocks2(ip, change, cred, 561 quota_flags | UFS_QUOTA_FORCE); 562 (void) ufs_quota_alloc_inode2(ip, cred, 563 quota_flags | UFS_QUOTA_FORCE); 564 (void) getinoquota(ip); 565 } 566 return (error); 567 568 } 569 570 int 571 ufs_ioctl(void *v) 572 { 573 #if 0 574 struct vop_ioctl_args *ap = v; 575 #endif 576 return (ENOTTY); 577 } 578 579 int 580 ufs_remove(void *v) 581 { 582 struct vop_remove_args *ap = v; 583 struct inode *ip; 584 struct vnode *vp = ap->a_vp; 585 struct vnode *dvp = ap->a_dvp; 586 int error; 587 588 ip = VTOI(vp); 589 if (vp->v_type == VDIR || (DIP(ip, flags) & (IMMUTABLE | APPEND)) || 590 (DIP(VTOI(dvp), flags) & APPEND)) { 591 error = EPERM; 592 goto out; 593 } 594 error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); 595 VN_KNOTE(vp, NOTE_DELETE); 596 VN_KNOTE(dvp, NOTE_WRITE); 597 out: 598 return (error); 599 } 600 601 /* 602 * link vnode call 603 */ 604 int 605 ufs_link(void *v) 606 { 607 struct vop_link_args *ap = v; 608 struct vnode *dvp = ap->a_dvp; 609 struct vnode *vp = ap->a_vp; 610 struct componentname *cnp = ap->a_cnp; 611 struct inode *ip; 612 struct direct newdir; 613 int error; 614 615 #ifdef DIAGNOSTIC 616 if ((cnp->cn_flags & HASBUF) == 0) 617 panic("ufs_link: no name"); 618 #endif 619 if (dvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE))) { 620 VOP_ABORTOP(dvp, cnp); 621 goto out2; 622 } 623 ip = VTOI(vp); 624 if ((nlink_t) DIP(ip, nlink) >= LINK_MAX) { 625 VOP_ABORTOP(dvp, cnp); 626 error = EMLINK; 627 goto out1; 628 } 629 if (DIP(ip, flags) & (IMMUTABLE | APPEND)) { 630 VOP_ABORTOP(dvp, cnp); 631 error = EPERM; 632 goto out1; 633 } 634 ip->i_effnlink++; 635 DIP_ADD(ip, nlink, 1); 636 ip->i_flag |= IN_CHANGE; 637 if ((error = UFS_UPDATE(ip, 1)) == 0) { 638 ufs_makedirentry(ip, cnp, &newdir); 639 error = ufs_direnter(dvp, vp, &newdir, cnp, NULL); 640 } 641 if (error) { 642 ip->i_effnlink--; 643 DIP_ADD(ip, nlink, -1); 644 ip->i_flag |= IN_CHANGE; 645 } 646 pool_put(&namei_pool, cnp->cn_pnbuf); 647 VN_KNOTE(vp, NOTE_LINK); 648 VN_KNOTE(dvp, NOTE_WRITE); 649 out1: 650 if (dvp != vp) 651 VOP_UNLOCK(vp); 652 out2: 653 vput(dvp); 654 return (error); 655 } 656 657 /* 658 * Rename system call. 659 * rename("foo", "bar"); 660 * is essentially 661 * unlink("bar"); 662 * link("foo", "bar"); 663 * unlink("foo"); 664 * but ``atomically''. Can't do full commit without saving state in the 665 * inode on disk which isn't feasible at this time. Best we can do is 666 * always guarantee the target exists. 667 * 668 * Basic algorithm is: 669 * 670 * 1) Bump link count on source while we're linking it to the 671 * target. This also ensure the inode won't be deleted out 672 * from underneath us while we work (it may be truncated by 673 * a concurrent `trunc' or `open' for creation). 674 * 2) Link source to destination. If destination already exists, 675 * delete it first. 676 * 3) Unlink source reference to inode if still around. If a 677 * directory was moved and the parent of the destination 678 * is different from the source, patch the ".." entry in the 679 * directory. 680 */ 681 int 682 ufs_rename(void *v) 683 { 684 struct vop_rename_args *ap = v; 685 struct vnode *tvp = ap->a_tvp; 686 struct vnode *tdvp = ap->a_tdvp; 687 struct vnode *fvp = ap->a_fvp; 688 struct vnode *fdvp = ap->a_fdvp; 689 struct componentname *tcnp = ap->a_tcnp; 690 struct componentname *fcnp = ap->a_fcnp; 691 struct inode *ip, *xp, *dp; 692 struct direct newdir; 693 int doingdirectory = 0, oldparent = 0, newparent = 0; 694 int error = 0; 695 696 #ifdef DIAGNOSTIC 697 if ((tcnp->cn_flags & HASBUF) == 0 || 698 (fcnp->cn_flags & HASBUF) == 0) 699 panic("ufs_rename: no name"); 700 #endif 701 /* 702 * Check for cross-device rename. 703 */ 704 if ((fvp->v_mount != tdvp->v_mount) || 705 (tvp && (fvp->v_mount != tvp->v_mount))) { 706 error = EXDEV; 707 abortit: 708 VOP_ABORTOP(tdvp, tcnp); 709 if (tdvp == tvp) 710 vrele(tdvp); 711 else 712 vput(tdvp); 713 if (tvp) 714 vput(tvp); 715 VOP_ABORTOP(fdvp, fcnp); 716 vrele(fdvp); 717 vrele(fvp); 718 return (error); 719 } 720 721 if (tvp && ((DIP(VTOI(tvp), flags) & (IMMUTABLE | APPEND)) || 722 (DIP(VTOI(tdvp), flags) & APPEND))) { 723 error = EPERM; 724 goto abortit; 725 } 726 727 /* 728 * Check if just deleting a link name or if we've lost a race. 729 * If another process completes the same rename after we've looked 730 * up the source and have blocked looking up the target, then the 731 * source and target inodes may be identical now although the 732 * names were never linked. 733 */ 734 if (fvp == tvp) { 735 if (fvp->v_type == VDIR) { 736 /* 737 * Linked directories are impossible, so we must 738 * have lost the race. Pretend that the rename 739 * completed before the lookup. 740 */ 741 error = ENOENT; 742 goto abortit; 743 } 744 745 /* Release destination completely. */ 746 VOP_ABORTOP(tdvp, tcnp); 747 vput(tdvp); 748 vput(tvp); 749 750 /* 751 * Delete source. There is another race now that everything 752 * is unlocked, but this doesn't cause any new complications. 753 * relookup() may find a file that is unrelated to the 754 * original one, or it may fail. Too bad. 755 */ 756 vrele(fvp); 757 fcnp->cn_flags &= ~MODMASK; 758 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 759 if ((fcnp->cn_flags & SAVESTART) == 0) 760 panic("ufs_rename: lost from startdir"); 761 fcnp->cn_nameiop = DELETE; 762 if ((error = vfs_relookup(fdvp, &fvp, fcnp)) != 0) 763 return (error); /* relookup did vrele() */ 764 vrele(fdvp); 765 return (VOP_REMOVE(fdvp, fvp, fcnp)); 766 } 767 768 if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) 769 goto abortit; 770 771 /* fvp, tdvp, tvp now locked */ 772 dp = VTOI(fdvp); 773 ip = VTOI(fvp); 774 if ((nlink_t) DIP(ip, nlink) >= LINK_MAX) { 775 VOP_UNLOCK(fvp); 776 error = EMLINK; 777 goto abortit; 778 } 779 if ((DIP(ip, flags) & (IMMUTABLE | APPEND)) || 780 (DIP(dp, flags) & APPEND)) { 781 VOP_UNLOCK(fvp); 782 error = EPERM; 783 goto abortit; 784 } 785 if ((DIP(ip, mode) & IFMT) == IFDIR) { 786 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); 787 if (!error && tvp) 788 error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); 789 if (error) { 790 VOP_UNLOCK(fvp); 791 error = EACCES; 792 goto abortit; 793 } 794 /* 795 * Avoid ".", "..", and aliases of "." for obvious reasons. 796 */ 797 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 798 dp == ip || 799 (fcnp->cn_flags & ISDOTDOT) || 800 (tcnp->cn_flags & ISDOTDOT) || 801 (ip->i_flag & IN_RENAME)) { 802 VOP_UNLOCK(fvp); 803 error = EINVAL; 804 goto abortit; 805 } 806 ip->i_flag |= IN_RENAME; 807 oldparent = dp->i_number; 808 doingdirectory = 1; 809 } 810 VN_KNOTE(fdvp, NOTE_WRITE); /* XXX right place? */ 811 812 /* 813 * When the target exists, both the directory 814 * and target vnodes are returned locked. 815 */ 816 dp = VTOI(tdvp); 817 xp = NULL; 818 if (tvp) 819 xp = VTOI(tvp); 820 821 /* 822 * 1) Bump link count while we're moving stuff 823 * around. If we crash somewhere before 824 * completing our work, the link count 825 * may be wrong, but correctable. 826 */ 827 ip->i_effnlink++; 828 DIP_ADD(ip, nlink, 1); 829 ip->i_flag |= IN_CHANGE; 830 if ((error = UFS_UPDATE(ip, 1)) != 0) { 831 VOP_UNLOCK(fvp); 832 goto bad; 833 } 834 835 /* 836 * If ".." must be changed (ie the directory gets a new 837 * parent) then the source directory must not be in the 838 * directory hierarchy above the target, as this would 839 * orphan everything below the source directory. Also 840 * the user must have write permission in the source so 841 * as to be able to change "..". We must repeat the call 842 * to namei, as the parent directory is unlocked by the 843 * call to checkpath(). 844 */ 845 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); 846 VOP_UNLOCK(fvp); 847 848 /* tdvp and tvp locked */ 849 if (oldparent != dp->i_number) 850 newparent = dp->i_number; 851 if (doingdirectory && newparent) { 852 if (error) /* write access check above */ 853 goto bad; 854 if (xp != NULL) 855 vput(tvp); 856 /* 857 * Compensate for the reference ufs_checkpath() loses. 858 */ 859 vref(tdvp); 860 /* Only tdvp is locked */ 861 if ((error = ufs_checkpath(ip, dp, tcnp->cn_cred)) != 0) { 862 vrele(tdvp); 863 goto out; 864 } 865 if ((tcnp->cn_flags & SAVESTART) == 0) 866 panic("ufs_rename: lost to startdir"); 867 if ((error = vfs_relookup(tdvp, &tvp, tcnp)) != 0) 868 goto out; 869 vrele(tdvp); /* relookup() acquired a reference */ 870 dp = VTOI(tdvp); 871 xp = NULL; 872 if (tvp) 873 xp = VTOI(tvp); 874 } 875 /* 876 * 2) If target doesn't exist, link the target 877 * to the source and unlink the source. 878 * Otherwise, rewrite the target directory 879 * entry to reference the source inode and 880 * expunge the original entry's existence. 881 */ 882 if (xp == NULL) { 883 if (dp->i_dev != ip->i_dev) 884 panic("rename: EXDEV"); 885 /* 886 * Account for ".." in new directory. 887 * When source and destination have the same 888 * parent we don't fool with the link count. 889 */ 890 if (doingdirectory && newparent) { 891 if ((nlink_t) DIP(dp, nlink) >= LINK_MAX) { 892 error = EMLINK; 893 goto bad; 894 } 895 dp->i_effnlink++; 896 DIP_ADD(dp, nlink, 1); 897 dp->i_flag |= IN_CHANGE; 898 if ((error = UFS_UPDATE(dp, 1)) 899 != 0) { 900 dp->i_effnlink--; 901 DIP_ADD(dp, nlink, -1); 902 dp->i_flag |= IN_CHANGE; 903 goto bad; 904 } 905 } 906 ufs_makedirentry(ip, tcnp, &newdir); 907 if ((error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL)) != 0) { 908 if (doingdirectory && newparent) { 909 dp->i_effnlink--; 910 DIP_ADD(dp, nlink, -1); 911 dp->i_flag |= IN_CHANGE; 912 (void)UFS_UPDATE(dp, 1); 913 } 914 goto bad; 915 } 916 VN_KNOTE(tdvp, NOTE_WRITE); 917 vput(tdvp); 918 } else { 919 if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev) 920 panic("rename: EXDEV"); 921 /* 922 * Short circuit rename(foo, foo). 923 */ 924 if (xp->i_number == ip->i_number) 925 panic("ufs_rename: same file"); 926 /* 927 * If the parent directory is "sticky", then the user must 928 * own the parent directory, or the destination of the rename, 929 * otherwise the destination may not be changed (except by 930 * root). This implements append-only directories. 931 */ 932 if ((DIP(dp, mode) & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && 933 tcnp->cn_cred->cr_uid != DIP(dp, uid) && 934 DIP(xp, uid )!= tcnp->cn_cred->cr_uid && 935 !vnoperm(tdvp)) { 936 error = EPERM; 937 goto bad; 938 } 939 /* 940 * Target must be empty if a directory and have no links 941 * to it. Also, ensure source and target are compatible 942 * (both directories, or both not directories). 943 */ 944 if ((DIP(xp, mode) & IFMT) == IFDIR) { 945 if (xp->i_effnlink > 2 || 946 !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { 947 error = ENOTEMPTY; 948 goto bad; 949 } 950 if (!doingdirectory) { 951 error = ENOTDIR; 952 goto bad; 953 } 954 cache_purge(tdvp); 955 } else if (doingdirectory) { 956 error = EISDIR; 957 goto bad; 958 } 959 960 if ((error = ufs_dirrewrite(dp, xp, ip->i_number, 961 IFTODT(DIP(ip, mode)), (doingdirectory && newparent) ? 962 newparent : doingdirectory)) != 0) 963 goto bad; 964 if (doingdirectory) { 965 if (!newparent) { 966 dp->i_effnlink--; 967 } 968 xp->i_effnlink--; 969 } 970 if (doingdirectory) { 971 /* 972 * Truncate inode. The only stuff left in the directory 973 * is "." and "..". The "." reference is inconsequential 974 * since we are quashing it. We have removed the "." 975 * reference and the reference in the parent directory, 976 * but there may be other hard links. The soft 977 * dependency code will arrange to do these operations 978 * after the parent directory entry has been deleted on 979 * disk, so when running with that code we avoid doing 980 * them now. 981 */ 982 if (!newparent) { 983 DIP_ADD(dp, nlink, -1); 984 dp->i_flag |= IN_CHANGE; 985 } 986 987 DIP_ADD(xp, nlink, -1); 988 xp->i_flag |= IN_CHANGE; 989 if ((error = UFS_TRUNCATE(VTOI(tvp), (off_t)0, IO_SYNC, 990 tcnp->cn_cred)) != 0) 991 goto bad; 992 } 993 VN_KNOTE(tdvp, NOTE_WRITE); 994 vput(tdvp); 995 VN_KNOTE(tvp, NOTE_DELETE); 996 vput(tvp); 997 xp = NULL; 998 } 999 1000 /* 1001 * 3) Unlink the source. 1002 */ 1003 fcnp->cn_flags &= ~MODMASK; 1004 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1005 if ((fcnp->cn_flags & SAVESTART) == 0) 1006 panic("ufs_rename: lost from startdir"); 1007 if ((error = vfs_relookup(fdvp, &fvp, fcnp)) != 0) { 1008 vrele(ap->a_fvp); 1009 return (error); 1010 } 1011 vrele(fdvp); 1012 if (fvp == NULL) { 1013 /* 1014 * From name has disappeared. 1015 */ 1016 if (doingdirectory) 1017 panic("ufs_rename: lost dir entry"); 1018 vrele(ap->a_fvp); 1019 return (0); 1020 } 1021 1022 xp = VTOI(fvp); 1023 dp = VTOI(fdvp); 1024 1025 /* 1026 * Ensure that the directory entry still exists and has not 1027 * changed while the new name has been entered. If the source is 1028 * a file then the entry may have been unlinked or renamed. In 1029 * either case there is no further work to be done. If the source 1030 * is a directory then it cannot have been rmdir'ed; the IN_RENAME 1031 * flag ensures that it cannot be moved by another rename or removed 1032 * by a rmdir. 1033 */ 1034 if (xp != ip) { 1035 if (doingdirectory) 1036 panic("ufs_rename: lost dir entry"); 1037 } else { 1038 /* 1039 * If the source is a directory with a 1040 * new parent, the link count of the old 1041 * parent directory must be decremented 1042 * and ".." set to point to the new parent. 1043 */ 1044 if (doingdirectory && newparent) { 1045 xp->i_offset = mastertemplate.dot_reclen; 1046 ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); 1047 cache_purge(fdvp); 1048 } 1049 error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); 1050 xp->i_flag &= ~IN_RENAME; 1051 } 1052 VN_KNOTE(fvp, NOTE_RENAME); 1053 if (dp) 1054 vput(fdvp); 1055 if (xp) 1056 vput(fvp); 1057 vrele(ap->a_fvp); 1058 return (error); 1059 1060 bad: 1061 if (xp) 1062 vput(ITOV(xp)); 1063 vput(ITOV(dp)); 1064 out: 1065 vrele(fdvp); 1066 if (doingdirectory) 1067 ip->i_flag &= ~IN_RENAME; 1068 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { 1069 ip->i_effnlink--; 1070 DIP_ADD(ip, nlink, -1); 1071 ip->i_flag |= IN_CHANGE; 1072 ip->i_flag &= ~IN_RENAME; 1073 vput(fvp); 1074 } else 1075 vrele(fvp); 1076 return (error); 1077 } 1078 1079 /* 1080 * Mkdir system call 1081 */ 1082 int 1083 ufs_mkdir(void *v) 1084 { 1085 struct vop_mkdir_args *ap = v; 1086 struct vnode *dvp = ap->a_dvp; 1087 struct vattr *vap = ap->a_vap; 1088 struct componentname *cnp = ap->a_cnp; 1089 struct inode *ip, *dp; 1090 struct vnode *tvp; 1091 struct buf *bp; 1092 struct direct newdir; 1093 struct dirtemplate dirtemplate; 1094 int error, dmode; 1095 1096 #ifdef DIAGNOSTIC 1097 if ((cnp->cn_flags & HASBUF) == 0) 1098 panic("ufs_mkdir: no name"); 1099 #endif 1100 dp = VTOI(dvp); 1101 if ((nlink_t) DIP(dp, nlink) >= LINK_MAX) { 1102 error = EMLINK; 1103 goto out; 1104 } 1105 dmode = vap->va_mode & 0777; 1106 dmode |= IFDIR; 1107 /* 1108 * Must simulate part of ufs_makeinode here to acquire the inode, 1109 * but not have it entered in the parent directory. The entry is 1110 * made later after writing "." and ".." entries. 1111 */ 1112 if ((error = UFS_INODE_ALLOC(dp, dmode, cnp->cn_cred, &tvp)) != 0) 1113 goto out; 1114 1115 ip = VTOI(tvp); 1116 1117 DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid); 1118 DIP_ASSIGN(ip, gid, DIP(dp, gid)); 1119 1120 if ((error = getinoquota(ip)) || 1121 (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) { 1122 pool_put(&namei_pool, cnp->cn_pnbuf); 1123 UFS_INODE_FREE(ip, ip->i_number, dmode); 1124 vput(tvp); 1125 vput(dvp); 1126 return (error); 1127 } 1128 1129 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1130 DIP_ASSIGN(ip, mode, dmode); 1131 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 1132 ip->i_effnlink = 2; 1133 DIP_ASSIGN(ip, nlink, 2); 1134 1135 /* 1136 * Bump link count in parent directory to reflect work done below. 1137 * Should be done before reference is create so cleanup is 1138 * possible if we crash. 1139 */ 1140 dp->i_effnlink++; 1141 DIP_ADD(dp, nlink, 1); 1142 dp->i_flag |= IN_CHANGE; 1143 if ((error = UFS_UPDATE(dp, 1)) != 0) 1144 goto bad; 1145 1146 /* 1147 * Initialize directory with "." and ".." from static template. 1148 */ 1149 dirtemplate = mastertemplate; 1150 dirtemplate.dot_ino = ip->i_number; 1151 dirtemplate.dotdot_ino = dp->i_number; 1152 1153 if ((error = UFS_BUF_ALLOC(ip, (off_t)0, DIRBLKSIZ, cnp->cn_cred, 1154 B_CLRBUF, &bp)) != 0) 1155 goto bad; 1156 DIP_ASSIGN(ip, size, DIRBLKSIZ); 1157 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1158 uvm_vnp_setsize(tvp, DIP(ip, size)); 1159 memcpy(bp->b_data, &dirtemplate, sizeof(dirtemplate)); 1160 if ((error = UFS_UPDATE(ip, 1)) != 0) { 1161 (void)VOP_BWRITE(bp); 1162 goto bad; 1163 } 1164 1165 /* 1166 * Directory set up, now install its entry in the parent directory. 1167 * 1168 * If we are not doing soft dependencies, then we must write out the 1169 * buffer containing the new directory body before entering the new 1170 * name in the parent. If we are doing soft dependencies, then the 1171 * buffer containing the new directory body will be passed to and 1172 * released in the soft dependency code after the code has attached 1173 * an appropriate ordering dependency to the buffer which ensures that 1174 * the buffer is written before the new name is written in the parent. 1175 */ 1176 if ((error = VOP_BWRITE(bp)) != 0) 1177 goto bad; 1178 ufs_makedirentry(ip, cnp, &newdir); 1179 error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); 1180 1181 bad: 1182 if (error == 0) { 1183 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); 1184 *ap->a_vpp = tvp; 1185 } else { 1186 dp->i_effnlink--; 1187 DIP_ADD(dp, nlink, -1); 1188 dp->i_flag |= IN_CHANGE; 1189 /* 1190 * No need to do an explicit VOP_TRUNCATE here, vrele will 1191 * do this for us because we set the link count to 0. 1192 */ 1193 ip->i_effnlink = 0; 1194 DIP_ASSIGN(ip, nlink, 0); 1195 ip->i_flag |= IN_CHANGE; 1196 vput(tvp); 1197 } 1198 out: 1199 pool_put(&namei_pool, cnp->cn_pnbuf); 1200 vput(dvp); 1201 1202 return (error); 1203 } 1204 1205 /* 1206 * Rmdir system call. 1207 */ 1208 int 1209 ufs_rmdir(void *v) 1210 { 1211 struct vop_rmdir_args *ap = v; 1212 struct vnode *vp = ap->a_vp; 1213 struct vnode *dvp = ap->a_dvp; 1214 struct componentname *cnp = ap->a_cnp; 1215 struct inode *ip, *dp; 1216 int error; 1217 1218 ip = VTOI(vp); 1219 dp = VTOI(dvp); 1220 /* 1221 * Do not remove a directory that is in the process of being renamed. 1222 * Verify the directory is empty (and valid). Rmdir ".." will not be 1223 * valid since ".." will contain a reference to the current directory 1224 * and thus be non-empty. 1225 */ 1226 error = 0; 1227 if (ip->i_flag & IN_RENAME) { 1228 error = EINVAL; 1229 goto out; 1230 } 1231 if (ip->i_effnlink != 2 || 1232 !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { 1233 error = ENOTEMPTY; 1234 goto out; 1235 } 1236 if ((DIP(dp, flags) & APPEND) || 1237 (DIP(ip, flags) & (IMMUTABLE | APPEND))) { 1238 error = EPERM; 1239 goto out; 1240 } 1241 /* 1242 * Delete reference to directory before purging 1243 * inode. If we crash in between, the directory 1244 * will be reattached to lost+found, 1245 */ 1246 dp->i_effnlink--; 1247 ip->i_effnlink--; 1248 if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0) { 1249 dp->i_effnlink++; 1250 ip->i_effnlink++; 1251 goto out; 1252 } 1253 1254 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); 1255 cache_purge(dvp); 1256 /* 1257 * Truncate inode. The only stuff left in the directory is "." and 1258 * "..". The "." reference is inconsequential since we are quashing 1259 * it. 1260 */ 1261 DIP_ADD(dp, nlink, -1); 1262 dp->i_flag |= IN_CHANGE; 1263 DIP_ADD(ip, nlink, -1); 1264 ip->i_flag |= IN_CHANGE; 1265 error = UFS_TRUNCATE(ip, (off_t)0, DOINGASYNC(vp) ? 0 : IO_SYNC, 1266 cnp->cn_cred); 1267 1268 cache_purge(vp); 1269 #ifdef UFS_DIRHASH 1270 /* Kill any active hash; i_effnlink == 0, so it will not come back. */ 1271 if (ip->i_dirhash != NULL) 1272 ufsdirhash_free(ip); 1273 #endif 1274 1275 out: 1276 VN_KNOTE(vp, NOTE_DELETE); 1277 vput(dvp); 1278 vput(vp); 1279 return (error); 1280 } 1281 1282 /* 1283 * symlink -- make a symbolic link 1284 */ 1285 int 1286 ufs_symlink(void *v) 1287 { 1288 struct vop_symlink_args *ap = v; 1289 struct vnode *vp, **vpp = ap->a_vpp; 1290 struct inode *ip; 1291 int len, error; 1292 1293 error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, 1294 vpp, ap->a_cnp); 1295 if (error) { 1296 vput(ap->a_dvp); 1297 return (error); 1298 } 1299 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 1300 vput(ap->a_dvp); 1301 vp = *vpp; 1302 ip = VTOI(vp); 1303 len = strlen(ap->a_target); 1304 if (len < ip->i_ump->um_maxsymlinklen) { 1305 memcpy(SHORTLINK(ip), ap->a_target, len); 1306 DIP_ASSIGN(ip, size, len); 1307 ip->i_flag |= IN_CHANGE | IN_UPDATE; 1308 } else 1309 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 1310 UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, NULL, 1311 curproc); 1312 vput(vp); 1313 return (error); 1314 } 1315 1316 /* 1317 * Vnode op for reading directories. 1318 * 1319 * This routine converts the on-disk struct direct entries to the 1320 * struct dirent entries expected by userland and the rest of the kernel. 1321 */ 1322 int 1323 ufs_readdir(void *v) 1324 { 1325 struct vop_readdir_args *ap = v; 1326 struct uio auio, *uio = ap->a_uio; 1327 struct iovec aiov; 1328 union { 1329 struct dirent dn; 1330 char __pad[roundup(sizeof(struct dirent), 8)]; 1331 } u; 1332 off_t off = uio->uio_offset; 1333 struct direct *dp; 1334 char *edp; 1335 caddr_t diskbuf; 1336 size_t count, entries; 1337 int bufsize, readcnt, error; 1338 1339 if (uio->uio_rw != UIO_READ) 1340 return (EINVAL); 1341 1342 count = uio->uio_resid; 1343 entries = (uio->uio_offset + count) & (DIRBLKSIZ - 1); 1344 1345 /* Make sure we don't return partial entries. */ 1346 if (count <= entries) 1347 return (EINVAL); 1348 1349 /* 1350 * Convert and copy back the on-disk struct direct format to 1351 * the user-space struct dirent format, one entry at a time 1352 */ 1353 1354 /* read from disk, stopping on a block boundary, max 64kB */ 1355 readcnt = min(count, 64*1024) - entries; 1356 1357 auio = *uio; 1358 auio.uio_iov = &aiov; 1359 auio.uio_iovcnt = 1; 1360 auio.uio_resid = readcnt; 1361 auio.uio_segflg = UIO_SYSSPACE; 1362 aiov.iov_len = readcnt; 1363 bufsize = readcnt; 1364 diskbuf = malloc(bufsize, M_TEMP, M_WAITOK); 1365 aiov.iov_base = diskbuf; 1366 error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); 1367 readcnt -= auio.uio_resid; 1368 dp = (struct direct *)diskbuf; 1369 edp = &diskbuf[readcnt]; 1370 1371 memset(&u, 0, sizeof(u)); 1372 1373 /* 1374 * While 1375 * - we haven't failed to VOP_READ or uiomove() 1376 * - there's space in the read buf for the head of an entry 1377 * - that entry has a valid d_reclen, and 1378 * - there's space for the *entire* entry 1379 * then we're good to process this one. 1380 */ 1381 while (error == 0 && 1382 (char *)dp + offsetof(struct direct, d_name) < edp && 1383 dp->d_reclen > offsetof(struct direct, d_name) && 1384 (char *)dp + dp->d_reclen <= edp) { 1385 u.dn.d_reclen = roundup(dp->d_namlen+1 + 1386 offsetof(struct dirent, d_name), 8); 1387 if (u.dn.d_reclen > uio->uio_resid) 1388 break; 1389 off += dp->d_reclen; 1390 u.dn.d_off = off; 1391 u.dn.d_fileno = dp->d_ino; 1392 u.dn.d_type = dp->d_type; 1393 u.dn.d_namlen = dp->d_namlen; 1394 memcpy(u.dn.d_name, dp->d_name, u.dn.d_namlen); 1395 memset(u.dn.d_name + u.dn.d_namlen, 0, u.dn.d_reclen 1396 - u.dn.d_namlen - offsetof(struct dirent, d_name)); 1397 1398 if (memchr(u.dn.d_name, '/', u.dn.d_namlen) != NULL) { 1399 error = EINVAL; 1400 break; 1401 } 1402 1403 error = uiomove(&u.dn, u.dn.d_reclen, uio); 1404 dp = (struct direct *)((char *)dp + dp->d_reclen); 1405 } 1406 1407 /* 1408 * If there was room for an entry in what we read but its 1409 * d_reclen is bogus, fail 1410 */ 1411 if ((char *)dp + offsetof(struct direct, d_name) < edp && 1412 dp->d_reclen <= offsetof(struct direct, d_name)) 1413 error = EIO; 1414 free(diskbuf, M_TEMP, bufsize); 1415 1416 uio->uio_offset = off; 1417 *ap->a_eofflag = DIP(VTOI(ap->a_vp), size) <= off; 1418 1419 return (error); 1420 } 1421 1422 /* 1423 * Return target name of a symbolic link 1424 */ 1425 int 1426 ufs_readlink(void *v) 1427 { 1428 struct vop_readlink_args *ap = v; 1429 struct vnode *vp = ap->a_vp; 1430 struct inode *ip = VTOI(vp); 1431 u_int64_t isize; 1432 1433 isize = DIP(ip, size); 1434 if (isize < ip->i_ump->um_maxsymlinklen) 1435 return (uiomove((char *)SHORTLINK(ip), isize, ap->a_uio)); 1436 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 1437 } 1438 1439 /* 1440 * Lock an inode. If its already locked, set the WANT bit and sleep. 1441 */ 1442 int 1443 ufs_lock(void *v) 1444 { 1445 struct vop_lock_args *ap = v; 1446 struct vnode *vp = ap->a_vp; 1447 1448 return rrw_enter(&VTOI(vp)->i_lock, ap->a_flags & LK_RWFLAGS); 1449 } 1450 1451 /* 1452 * Unlock an inode. If WANT bit is on, wakeup. 1453 */ 1454 int 1455 ufs_unlock(void *v) 1456 { 1457 struct vop_unlock_args *ap = v; 1458 struct vnode *vp = ap->a_vp; 1459 1460 rrw_exit(&VTOI(vp)->i_lock); 1461 return 0; 1462 } 1463 1464 /* 1465 * Check for a locked inode. 1466 */ 1467 int 1468 ufs_islocked(void *v) 1469 { 1470 struct vop_islocked_args *ap = v; 1471 1472 return rrw_status(&VTOI(ap->a_vp)->i_lock); 1473 } 1474 1475 /* 1476 * Calculate the logical to physical mapping if not done already, 1477 * then call the device strategy routine. 1478 */ 1479 int 1480 ufs_strategy(void *v) 1481 { 1482 struct vop_strategy_args *ap = v; 1483 struct buf *bp = ap->a_bp; 1484 struct vnode *vp = bp->b_vp; 1485 struct inode *ip; 1486 int error; 1487 int s; 1488 1489 ip = VTOI(vp); 1490 if (vp->v_type == VBLK || vp->v_type == VCHR) 1491 panic("ufs_strategy: spec"); 1492 if (bp->b_blkno == bp->b_lblkno) { 1493 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 1494 NULL); 1495 if (error) { 1496 bp->b_error = error; 1497 bp->b_flags |= B_ERROR; 1498 s = splbio(); 1499 biodone(bp); 1500 splx(s); 1501 return (error); 1502 } 1503 if (bp->b_blkno == -1) 1504 clrbuf(bp); 1505 } 1506 if (bp->b_blkno == -1) { 1507 s = splbio(); 1508 biodone(bp); 1509 splx(s); 1510 return (0); 1511 } 1512 vp = ip->i_devvp; 1513 bp->b_dev = vp->v_rdev; 1514 VOP_STRATEGY(vp, bp); 1515 return (0); 1516 } 1517 1518 /* 1519 * Print out the contents of an inode. 1520 */ 1521 int 1522 ufs_print(void *v) 1523 { 1524 #if defined(DEBUG) || defined(DIAGNOSTIC) || defined(VFSLCKDEBUG) 1525 struct vop_print_args *ap = v; 1526 1527 struct vnode *vp = ap->a_vp; 1528 struct inode *ip = VTOI(vp); 1529 1530 printf("tag VT_UFS, ino %u, on dev %d, %d", ip->i_number, 1531 major(ip->i_dev), minor(ip->i_dev)); 1532 printf(" flags 0x%x, effnlink %d, nlink %d\n", 1533 ip->i_flag, ip->i_effnlink, DIP(ip, nlink)); 1534 printf("\tmode 0%o, owner %d, group %d, size %lld", 1535 DIP(ip, mode), DIP(ip, uid), DIP(ip, gid), DIP(ip, size)); 1536 1537 #ifdef FIFO 1538 if (vp->v_type == VFIFO) 1539 fifo_printinfo(vp); 1540 #endif /* FIFO */ 1541 printf("\n"); 1542 #endif 1543 1544 return (0); 1545 } 1546 1547 /* 1548 * Read wrapper for special devices. 1549 */ 1550 int 1551 ufsspec_read(void *v) 1552 { 1553 struct vop_read_args *ap = v; 1554 1555 /* 1556 * Set access flag. 1557 */ 1558 VTOI(ap->a_vp)->i_flag |= IN_ACCESS; 1559 return (spec_read(ap)); 1560 } 1561 1562 /* 1563 * Write wrapper for special devices. 1564 */ 1565 int 1566 ufsspec_write(void *v) 1567 { 1568 struct vop_write_args *ap = v; 1569 1570 /* 1571 * Set update and change flags. 1572 */ 1573 VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; 1574 return (spec_write(ap)); 1575 } 1576 1577 /* 1578 * Close wrapper for special devices. 1579 * 1580 * Update the times on the inode then do device close. 1581 */ 1582 int 1583 ufsspec_close(void *v) 1584 { 1585 struct vop_close_args *ap = v; 1586 struct vnode *vp = ap->a_vp; 1587 1588 if (vp->v_usecount > 1) 1589 ufs_itimes(vp); 1590 return (spec_close(ap)); 1591 } 1592 1593 #ifdef FIFO 1594 /* 1595 * Read wrapper for fifo's 1596 */ 1597 int 1598 ufsfifo_read(void *v) 1599 { 1600 struct vop_read_args *ap = v; 1601 1602 /* 1603 * Set access flag. 1604 */ 1605 VTOI(ap->a_vp)->i_flag |= IN_ACCESS; 1606 return (fifo_read(ap)); 1607 } 1608 1609 /* 1610 * Write wrapper for fifo's. 1611 */ 1612 int 1613 ufsfifo_write(void *v) 1614 { 1615 struct vop_write_args *ap = v; 1616 1617 /* 1618 * Set update and change flags. 1619 */ 1620 VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE; 1621 return (fifo_write(ap)); 1622 } 1623 1624 /* 1625 * Close wrapper for fifo's. 1626 * 1627 * Update the times on the inode then do device close. 1628 */ 1629 int 1630 ufsfifo_close(void *v) 1631 { 1632 struct vop_close_args *ap = v; 1633 struct vnode *vp = ap->a_vp; 1634 1635 if (vp->v_usecount > 1) 1636 ufs_itimes(vp); 1637 return (fifo_close(ap)); 1638 } 1639 #endif /* FIFO */ 1640 1641 /* 1642 * Return POSIX pathconf information applicable to ufs filesystems. 1643 */ 1644 int 1645 ufs_pathconf(void *v) 1646 { 1647 struct vop_pathconf_args *ap = v; 1648 int error = 0; 1649 1650 switch (ap->a_name) { 1651 case _PC_LINK_MAX: 1652 *ap->a_retval = LINK_MAX; 1653 break; 1654 case _PC_NAME_MAX: 1655 *ap->a_retval = NAME_MAX; 1656 break; 1657 case _PC_CHOWN_RESTRICTED: 1658 *ap->a_retval = 1; 1659 break; 1660 case _PC_NO_TRUNC: 1661 *ap->a_retval = 1; 1662 break; 1663 case _PC_ALLOC_SIZE_MIN: 1664 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; 1665 break; 1666 case _PC_FILESIZEBITS: 1667 *ap->a_retval = 64; 1668 break; 1669 case _PC_REC_INCR_XFER_SIZE: 1670 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1671 break; 1672 case _PC_REC_MAX_XFER_SIZE: 1673 *ap->a_retval = -1; /* means ``unlimited'' */ 1674 break; 1675 case _PC_REC_MIN_XFER_SIZE: 1676 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; 1677 break; 1678 case _PC_REC_XFER_ALIGN: 1679 *ap->a_retval = PAGE_SIZE; 1680 break; 1681 case _PC_SYMLINK_MAX: 1682 *ap->a_retval = MAXPATHLEN; 1683 break; 1684 case _PC_2_SYMLINKS: 1685 *ap->a_retval = 1; 1686 break; 1687 case _PC_TIMESTAMP_RESOLUTION: 1688 *ap->a_retval = 1; 1689 break; 1690 default: 1691 error = EINVAL; 1692 break; 1693 } 1694 1695 return (error); 1696 } 1697 1698 /* 1699 * Advisory record locking support 1700 */ 1701 int 1702 ufs_advlock(void *v) 1703 { 1704 struct vop_advlock_args *ap = v; 1705 struct inode *ip = VTOI(ap->a_vp); 1706 1707 return (lf_advlock(&ip->i_lockf, DIP(ip, size), ap->a_id, ap->a_op, 1708 ap->a_fl, ap->a_flags)); 1709 } 1710 1711 /* 1712 * Allocate a new inode. 1713 */ 1714 int 1715 ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, 1716 struct componentname *cnp) 1717 { 1718 struct inode *ip, *pdir; 1719 struct direct newdir; 1720 struct vnode *tvp; 1721 int error; 1722 1723 pdir = VTOI(dvp); 1724 #ifdef DIAGNOSTIC 1725 if ((cnp->cn_flags & HASBUF) == 0) 1726 panic("ufs_makeinode: no name"); 1727 #endif 1728 *vpp = NULL; 1729 if ((mode & IFMT) == 0) 1730 mode |= IFREG; 1731 1732 if ((error = UFS_INODE_ALLOC(pdir, mode, cnp->cn_cred, &tvp)) != 0) { 1733 pool_put(&namei_pool, cnp->cn_pnbuf); 1734 return (error); 1735 } 1736 1737 ip = VTOI(tvp); 1738 1739 DIP_ASSIGN(ip, gid, DIP(pdir, gid)); 1740 DIP_ASSIGN(ip, uid, cnp->cn_cred->cr_uid); 1741 1742 if ((error = getinoquota(ip)) || 1743 (error = ufs_quota_alloc_inode(ip, cnp->cn_cred))) { 1744 pool_put(&namei_pool, cnp->cn_pnbuf); 1745 UFS_INODE_FREE(ip, ip->i_number, mode); 1746 vput(tvp); 1747 return (error); 1748 } 1749 1750 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 1751 DIP_ASSIGN(ip, mode, mode); 1752 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 1753 ip->i_effnlink = 1; 1754 DIP_ASSIGN(ip, nlink, 1); 1755 if ((DIP(ip, mode) & ISGID) && 1756 !groupmember(DIP(ip, gid), cnp->cn_cred) && 1757 !vnoperm(dvp) && 1758 suser_ucred(cnp->cn_cred)) 1759 DIP_AND(ip, mode, ~ISGID); 1760 1761 /* 1762 * Make sure inode goes to disk before directory entry. 1763 */ 1764 if ((error = UFS_UPDATE(ip, 1)) != 0) 1765 goto bad; 1766 1767 ufs_makedirentry(ip, cnp, &newdir); 1768 if ((error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL)) != 0) 1769 goto bad; 1770 1771 if ((cnp->cn_flags & SAVESTART) == 0) 1772 pool_put(&namei_pool, cnp->cn_pnbuf); 1773 *vpp = tvp; 1774 return (0); 1775 1776 bad: 1777 /* 1778 * Write error occurred trying to update the inode 1779 * or the directory so must deallocate the inode. 1780 */ 1781 pool_put(&namei_pool, cnp->cn_pnbuf); 1782 ip->i_effnlink = 0; 1783 DIP_ASSIGN(ip, nlink, 0); 1784 ip->i_flag |= IN_CHANGE; 1785 tvp->v_type = VNON; 1786 vput(tvp); 1787 1788 return (error); 1789 } 1790 1791 const struct filterops ufsread_filtops = { 1792 .f_flags = FILTEROP_ISFD, 1793 .f_attach = NULL, 1794 .f_detach = filt_ufsdetach, 1795 .f_event = filt_ufsread, 1796 }; 1797 1798 const struct filterops ufswrite_filtops = { 1799 .f_flags = FILTEROP_ISFD, 1800 .f_attach = NULL, 1801 .f_detach = filt_ufsdetach, 1802 .f_event = filt_ufswrite, 1803 }; 1804 1805 const struct filterops ufsvnode_filtops = { 1806 .f_flags = FILTEROP_ISFD, 1807 .f_attach = NULL, 1808 .f_detach = filt_ufsdetach, 1809 .f_event = filt_ufsvnode, 1810 }; 1811 1812 int 1813 ufs_kqfilter(void *v) 1814 { 1815 struct vop_kqfilter_args *ap = v; 1816 struct vnode *vp = ap->a_vp; 1817 struct knote *kn = ap->a_kn; 1818 1819 switch (kn->kn_filter) { 1820 case EVFILT_READ: 1821 kn->kn_fop = &ufsread_filtops; 1822 break; 1823 case EVFILT_WRITE: 1824 kn->kn_fop = &ufswrite_filtops; 1825 break; 1826 case EVFILT_VNODE: 1827 kn->kn_fop = &ufsvnode_filtops; 1828 break; 1829 default: 1830 return (EINVAL); 1831 } 1832 1833 kn->kn_hook = (caddr_t)vp; 1834 1835 klist_insert_locked(&vp->v_klist, kn); 1836 1837 return (0); 1838 } 1839 1840 void 1841 filt_ufsdetach(struct knote *kn) 1842 { 1843 struct vnode *vp = (struct vnode *)kn->kn_hook; 1844 1845 klist_remove_locked(&vp->v_klist, kn); 1846 } 1847 1848 int 1849 filt_ufsread(struct knote *kn, long hint) 1850 { 1851 struct vnode *vp = (struct vnode *)kn->kn_hook; 1852 struct inode *ip = VTOI(vp); 1853 1854 /* 1855 * filesystem is gone, so set the EOF flag and schedule 1856 * the knote for deletion. 1857 */ 1858 if (hint == NOTE_REVOKE) { 1859 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 1860 return (1); 1861 } 1862 1863 #ifdef EXT2FS 1864 if (IS_EXT2_VNODE(ip->i_vnode)) 1865 kn->kn_data = ext2fs_size(ip) - foffset(kn->kn_fp); 1866 else 1867 #endif 1868 kn->kn_data = DIP(ip, size) - foffset(kn->kn_fp); 1869 if (kn->kn_data == 0 && kn->kn_sfflags & NOTE_EOF) { 1870 kn->kn_fflags |= NOTE_EOF; 1871 return (1); 1872 } 1873 1874 if (kn->kn_flags & (__EV_POLL | __EV_SELECT)) 1875 return (1); 1876 1877 return (kn->kn_data != 0); 1878 } 1879 1880 int 1881 filt_ufswrite(struct knote *kn, long hint) 1882 { 1883 /* 1884 * filesystem is gone, so set the EOF flag and schedule 1885 * the knote for deletion. 1886 */ 1887 if (hint == NOTE_REVOKE) { 1888 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 1889 return (1); 1890 } 1891 1892 kn->kn_data = 0; 1893 return (1); 1894 } 1895 1896 int 1897 filt_ufsvnode(struct knote *kn, long hint) 1898 { 1899 if (kn->kn_sfflags & hint) 1900 kn->kn_fflags |= hint; 1901 if (hint == NOTE_REVOKE) { 1902 kn->kn_flags |= EV_EOF; 1903 return (1); 1904 } 1905 return (kn->kn_fflags != 0); 1906 } 1907