1 /* $NetBSD: ufs_vnops.c,v 1.206 2011/11/18 21:18:52 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Wasabi Systems, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1982, 1986, 1989, 1993, 1995 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)ufs_vnops.c 8.28 (Berkeley) 7/31/95 66 */ 67 68 #include <sys/cdefs.h> 69 __KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.206 2011/11/18 21:18:52 christos Exp $"); 70 71 #if defined(_KERNEL_OPT) 72 #include "opt_ffs.h" 73 #include "opt_quota.h" 74 #endif 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/namei.h> 79 #include <sys/resourcevar.h> 80 #include <sys/kernel.h> 81 #include <sys/file.h> 82 #include <sys/stat.h> 83 #include <sys/buf.h> 84 #include <sys/proc.h> 85 #include <sys/mount.h> 86 #include <sys/vnode.h> 87 #include <sys/malloc.h> 88 #include <sys/dirent.h> 89 #include <sys/lockf.h> 90 #include <sys/kauth.h> 91 #include <sys/wapbl.h> 92 #include <sys/fstrans.h> 93 94 #include <miscfs/specfs/specdev.h> 95 #include <miscfs/fifofs/fifo.h> 96 #include <miscfs/genfs/genfs.h> 97 98 #include <ufs/ufs/inode.h> 99 #include <ufs/ufs/dir.h> 100 #include <ufs/ufs/ufsmount.h> 101 #include <ufs/ufs/ufs_bswap.h> 102 #include <ufs/ufs/ufs_extern.h> 103 #include <ufs/ufs/ufs_wapbl.h> 104 #ifdef UFS_DIRHASH 105 #include <ufs/ufs/dirhash.h> 106 #endif 107 #include <ufs/ext2fs/ext2fs_extern.h> 108 #include <ufs/ext2fs/ext2fs_dir.h> 109 #include <ufs/ffs/ffs_extern.h> 110 #include <ufs/lfs/lfs_extern.h> 111 #include <ufs/lfs/lfs.h> 112 113 #include <uvm/uvm.h> 114 115 __CTASSERT(EXT2FS_MAXNAMLEN == FFS_MAXNAMLEN); 116 __CTASSERT(LFS_MAXNAMLEN == FFS_MAXNAMLEN); 117 118 static int ufs_chmod(struct vnode *, int, kauth_cred_t, struct lwp *); 119 static int ufs_chown(struct vnode *, uid_t, gid_t, kauth_cred_t, 120 struct lwp *); 121 122 /* 123 * A virgin directory (no blushing please). 124 */ 125 static const struct dirtemplate mastertemplate = { 126 0, 12, DT_DIR, 1, ".", 127 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." 128 }; 129 130 /* 131 * Create a regular file 132 */ 133 int 134 ufs_create(void *v) 135 { 136 struct vop_create_args /* { 137 struct vnode *a_dvp; 138 struct vnode **a_vpp; 139 struct componentname *a_cnp; 140 struct vattr *a_vap; 141 } */ *ap = v; 142 int error; 143 struct vnode *dvp = ap->a_dvp; 144 struct ufs_lookup_results *ulr; 145 146 /* XXX should handle this material another way */ 147 ulr = &VTOI(dvp)->i_crap; 148 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 149 150 /* 151 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful 152 * ufs_makeinode 153 */ 154 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 155 error = 156 ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), 157 dvp, ulr, ap->a_vpp, ap->a_cnp); 158 if (error) { 159 fstrans_done(dvp->v_mount); 160 return (error); 161 } 162 UFS_WAPBL_END1(dvp->v_mount, dvp); 163 fstrans_done(dvp->v_mount); 164 VN_KNOTE(dvp, NOTE_WRITE); 165 return (0); 166 } 167 168 /* 169 * Mknod vnode call 170 */ 171 /* ARGSUSED */ 172 int 173 ufs_mknod(void *v) 174 { 175 struct vop_mknod_args /* { 176 struct vnode *a_dvp; 177 struct vnode **a_vpp; 178 struct componentname *a_cnp; 179 struct vattr *a_vap; 180 } */ *ap = v; 181 struct vattr *vap; 182 struct vnode **vpp; 183 struct inode *ip; 184 int error; 185 struct mount *mp; 186 ino_t ino; 187 struct ufs_lookup_results *ulr; 188 189 vap = ap->a_vap; 190 vpp = ap->a_vpp; 191 192 /* XXX should handle this material another way */ 193 ulr = &VTOI(ap->a_dvp)->i_crap; 194 UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 195 196 /* 197 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful 198 * ufs_makeinode 199 */ 200 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); 201 if ((error = 202 ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), 203 ap->a_dvp, ulr, vpp, ap->a_cnp)) != 0) 204 goto out; 205 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 206 ip = VTOI(*vpp); 207 mp = (*vpp)->v_mount; 208 ino = ip->i_number; 209 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 210 if (vap->va_rdev != VNOVAL) { 211 struct ufsmount *ump = ip->i_ump; 212 /* 213 * Want to be able to use this to make badblock 214 * inodes, so don't truncate the dev number. 215 */ 216 if (ump->um_fstype == UFS1) 217 ip->i_ffs1_rdev = ufs_rw32(vap->va_rdev, 218 UFS_MPNEEDSWAP(ump)); 219 else 220 ip->i_ffs2_rdev = ufs_rw64(vap->va_rdev, 221 UFS_MPNEEDSWAP(ump)); 222 } 223 UFS_WAPBL_UPDATE(*vpp, NULL, NULL, 0); 224 UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp); 225 /* 226 * Remove inode so that it will be reloaded by VFS_VGET and 227 * checked to see if it is an alias of an existing entry in 228 * the inode cache. 229 */ 230 (*vpp)->v_type = VNON; 231 VOP_UNLOCK(*vpp); 232 vgone(*vpp); 233 error = VFS_VGET(mp, ino, vpp); 234 out: 235 fstrans_done(ap->a_dvp->v_mount); 236 if (error != 0) { 237 *vpp = NULL; 238 return (error); 239 } 240 return (0); 241 } 242 243 /* 244 * Open called. 245 * 246 * Nothing to do. 247 */ 248 /* ARGSUSED */ 249 int 250 ufs_open(void *v) 251 { 252 struct vop_open_args /* { 253 struct vnode *a_vp; 254 int a_mode; 255 kauth_cred_t a_cred; 256 } */ *ap = v; 257 258 /* 259 * Files marked append-only must be opened for appending. 260 */ 261 if ((VTOI(ap->a_vp)->i_flags & APPEND) && 262 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) 263 return (EPERM); 264 return (0); 265 } 266 267 /* 268 * Close called. 269 * 270 * Update the times on the inode. 271 */ 272 /* ARGSUSED */ 273 int 274 ufs_close(void *v) 275 { 276 struct vop_close_args /* { 277 struct vnode *a_vp; 278 int a_fflag; 279 kauth_cred_t a_cred; 280 } */ *ap = v; 281 struct vnode *vp; 282 struct inode *ip; 283 284 vp = ap->a_vp; 285 ip = VTOI(vp); 286 fstrans_start(vp->v_mount, FSTRANS_SHARED); 287 if (vp->v_usecount > 1) 288 UFS_ITIMES(vp, NULL, NULL, NULL); 289 fstrans_done(vp->v_mount); 290 return (0); 291 } 292 293 static int 294 ufs_check_possible(struct vnode *vp, struct inode *ip, mode_t mode, 295 kauth_cred_t cred) 296 { 297 #if defined(QUOTA) || defined(QUOTA2) 298 int error; 299 #endif 300 301 /* 302 * Disallow write attempts on read-only file systems; 303 * unless the file is a socket, fifo, or a block or 304 * character device resident on the file system. 305 */ 306 if (mode & VWRITE) { 307 switch (vp->v_type) { 308 case VDIR: 309 case VLNK: 310 case VREG: 311 if (vp->v_mount->mnt_flag & MNT_RDONLY) 312 return (EROFS); 313 #if defined(QUOTA) || defined(QUOTA2) 314 fstrans_start(vp->v_mount, FSTRANS_SHARED); 315 error = chkdq(ip, 0, cred, 0); 316 fstrans_done(vp->v_mount); 317 if (error != 0) 318 return error; 319 #endif 320 break; 321 case VBAD: 322 case VBLK: 323 case VCHR: 324 case VSOCK: 325 case VFIFO: 326 case VNON: 327 default: 328 break; 329 } 330 } 331 332 /* If it is a snapshot, nobody gets access to it. */ 333 if ((ip->i_flags & SF_SNAPSHOT)) 334 return (EPERM); 335 /* If immutable bit set, nobody gets to write it. */ 336 if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE)) 337 return (EPERM); 338 339 return 0; 340 } 341 342 static int 343 ufs_check_permitted(struct vnode *vp, struct inode *ip, mode_t mode, 344 kauth_cred_t cred) 345 { 346 347 return genfs_can_access(vp->v_type, ip->i_mode & ALLPERMS, ip->i_uid, 348 ip->i_gid, mode, cred); 349 } 350 351 int 352 ufs_access(void *v) 353 { 354 struct vop_access_args /* { 355 struct vnode *a_vp; 356 int a_mode; 357 kauth_cred_t a_cred; 358 } */ *ap = v; 359 struct vnode *vp; 360 struct inode *ip; 361 mode_t mode; 362 int error; 363 364 vp = ap->a_vp; 365 ip = VTOI(vp); 366 mode = ap->a_mode; 367 368 error = ufs_check_possible(vp, ip, mode, ap->a_cred); 369 if (error) 370 return error; 371 372 error = ufs_check_permitted(vp, ip, mode, ap->a_cred); 373 374 return error; 375 } 376 377 /* ARGSUSED */ 378 int 379 ufs_getattr(void *v) 380 { 381 struct vop_getattr_args /* { 382 struct vnode *a_vp; 383 struct vattr *a_vap; 384 kauth_cred_t a_cred; 385 } */ *ap = v; 386 struct vnode *vp; 387 struct inode *ip; 388 struct vattr *vap; 389 390 vp = ap->a_vp; 391 ip = VTOI(vp); 392 vap = ap->a_vap; 393 fstrans_start(vp->v_mount, FSTRANS_SHARED); 394 UFS_ITIMES(vp, NULL, NULL, NULL); 395 396 /* 397 * Copy from inode table 398 */ 399 vap->va_fsid = ip->i_dev; 400 vap->va_fileid = ip->i_number; 401 vap->va_mode = ip->i_mode & ALLPERMS; 402 vap->va_nlink = ip->i_nlink; 403 vap->va_uid = ip->i_uid; 404 vap->va_gid = ip->i_gid; 405 vap->va_size = vp->v_size; 406 if (ip->i_ump->um_fstype == UFS1) { 407 vap->va_rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev, 408 UFS_MPNEEDSWAP(ip->i_ump)); 409 vap->va_atime.tv_sec = ip->i_ffs1_atime; 410 vap->va_atime.tv_nsec = ip->i_ffs1_atimensec; 411 vap->va_mtime.tv_sec = ip->i_ffs1_mtime; 412 vap->va_mtime.tv_nsec = ip->i_ffs1_mtimensec; 413 vap->va_ctime.tv_sec = ip->i_ffs1_ctime; 414 vap->va_ctime.tv_nsec = ip->i_ffs1_ctimensec; 415 vap->va_birthtime.tv_sec = 0; 416 vap->va_birthtime.tv_nsec = 0; 417 vap->va_bytes = dbtob((u_quad_t)ip->i_ffs1_blocks); 418 } else { 419 vap->va_rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev, 420 UFS_MPNEEDSWAP(ip->i_ump)); 421 vap->va_atime.tv_sec = ip->i_ffs2_atime; 422 vap->va_atime.tv_nsec = ip->i_ffs2_atimensec; 423 vap->va_mtime.tv_sec = ip->i_ffs2_mtime; 424 vap->va_mtime.tv_nsec = ip->i_ffs2_mtimensec; 425 vap->va_ctime.tv_sec = ip->i_ffs2_ctime; 426 vap->va_ctime.tv_nsec = ip->i_ffs2_ctimensec; 427 vap->va_birthtime.tv_sec = ip->i_ffs2_birthtime; 428 vap->va_birthtime.tv_nsec = ip->i_ffs2_birthnsec; 429 vap->va_bytes = dbtob(ip->i_ffs2_blocks); 430 } 431 vap->va_gen = ip->i_gen; 432 vap->va_flags = ip->i_flags; 433 434 /* this doesn't belong here */ 435 if (vp->v_type == VBLK) 436 vap->va_blocksize = BLKDEV_IOSIZE; 437 else if (vp->v_type == VCHR) 438 vap->va_blocksize = MAXBSIZE; 439 else 440 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; 441 vap->va_type = vp->v_type; 442 vap->va_filerev = ip->i_modrev; 443 fstrans_done(vp->v_mount); 444 return (0); 445 } 446 447 /* 448 * Set attribute vnode op. called from several syscalls 449 */ 450 int 451 ufs_setattr(void *v) 452 { 453 struct vop_setattr_args /* { 454 struct vnode *a_vp; 455 struct vattr *a_vap; 456 kauth_cred_t a_cred; 457 } */ *ap = v; 458 struct vattr *vap; 459 struct vnode *vp; 460 struct inode *ip; 461 kauth_cred_t cred; 462 struct lwp *l; 463 int error; 464 465 vap = ap->a_vap; 466 vp = ap->a_vp; 467 ip = VTOI(vp); 468 cred = ap->a_cred; 469 l = curlwp; 470 471 /* 472 * Check for unsettable attributes. 473 */ 474 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || 475 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || 476 (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || 477 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { 478 return (EINVAL); 479 } 480 481 fstrans_start(vp->v_mount, FSTRANS_SHARED); 482 483 if (vap->va_flags != VNOVAL) { 484 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 485 error = EROFS; 486 goto out; 487 } 488 if (kauth_cred_geteuid(cred) != ip->i_uid && 489 (error = kauth_authorize_generic(cred, 490 KAUTH_GENERIC_ISSUSER, NULL))) 491 goto out; 492 if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, 493 NULL) == 0) { 494 if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) && 495 kauth_authorize_system(l->l_cred, 496 KAUTH_SYSTEM_CHSYSFLAGS, 0, NULL, NULL, NULL)) { 497 error = EPERM; 498 goto out; 499 } 500 /* Snapshot flag cannot be set or cleared */ 501 if ((vap->va_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) != 502 (ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL))) { 503 error = EPERM; 504 goto out; 505 } 506 error = UFS_WAPBL_BEGIN(vp->v_mount); 507 if (error) 508 goto out; 509 ip->i_flags = vap->va_flags; 510 DIP_ASSIGN(ip, flags, ip->i_flags); 511 } else { 512 if ((ip->i_flags & (SF_IMMUTABLE | SF_APPEND)) || 513 (vap->va_flags & UF_SETTABLE) != vap->va_flags) { 514 error = EPERM; 515 goto out; 516 } 517 if ((ip->i_flags & SF_SETTABLE) != 518 (vap->va_flags & SF_SETTABLE)) { 519 error = EPERM; 520 goto out; 521 } 522 error = UFS_WAPBL_BEGIN(vp->v_mount); 523 if (error) 524 goto out; 525 ip->i_flags &= SF_SETTABLE; 526 ip->i_flags |= (vap->va_flags & UF_SETTABLE); 527 DIP_ASSIGN(ip, flags, ip->i_flags); 528 } 529 ip->i_flag |= IN_CHANGE; 530 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 531 UFS_WAPBL_END(vp->v_mount); 532 if (vap->va_flags & (IMMUTABLE | APPEND)) { 533 error = 0; 534 goto out; 535 } 536 } 537 if (ip->i_flags & (IMMUTABLE | APPEND)) { 538 error = EPERM; 539 goto out; 540 } 541 /* 542 * Go through the fields and update iff not VNOVAL. 543 */ 544 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { 545 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 546 error = EROFS; 547 goto out; 548 } 549 error = UFS_WAPBL_BEGIN(vp->v_mount); 550 if (error) 551 goto out; 552 error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, l); 553 UFS_WAPBL_END(vp->v_mount); 554 if (error) 555 goto out; 556 } 557 if (vap->va_size != VNOVAL) { 558 /* 559 * Disallow write attempts on read-only file systems; 560 * unless the file is a socket, fifo, or a block or 561 * character device resident on the file system. 562 */ 563 switch (vp->v_type) { 564 case VDIR: 565 error = EISDIR; 566 goto out; 567 case VCHR: 568 case VBLK: 569 case VFIFO: 570 break; 571 case VREG: 572 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 573 error = EROFS; 574 goto out; 575 } 576 if ((ip->i_flags & SF_SNAPSHOT) != 0) { 577 error = EPERM; 578 goto out; 579 } 580 error = UFS_WAPBL_BEGIN(vp->v_mount); 581 if (error) 582 goto out; 583 /* 584 * When journaling, only truncate one indirect block 585 * at a time. 586 */ 587 if (vp->v_mount->mnt_wapbl) { 588 uint64_t incr = MNINDIR(ip->i_ump) << 589 vp->v_mount->mnt_fs_bshift; /* Power of 2 */ 590 uint64_t base = NDADDR << 591 vp->v_mount->mnt_fs_bshift; 592 while (!error && ip->i_size > base + incr && 593 ip->i_size > vap->va_size + incr) { 594 /* 595 * round down to next full indirect 596 * block boundary. 597 */ 598 uint64_t nsize = base + 599 ((ip->i_size - base - 1) & 600 ~(incr - 1)); 601 error = UFS_TRUNCATE(vp, nsize, 0, 602 cred); 603 if (error == 0) { 604 UFS_WAPBL_END(vp->v_mount); 605 error = 606 UFS_WAPBL_BEGIN(vp->v_mount); 607 } 608 } 609 } 610 if (!error) 611 error = UFS_TRUNCATE(vp, vap->va_size, 0, cred); 612 UFS_WAPBL_END(vp->v_mount); 613 if (error) 614 goto out; 615 break; 616 default: 617 error = EOPNOTSUPP; 618 goto out; 619 } 620 } 621 ip = VTOI(vp); 622 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || 623 vap->va_birthtime.tv_sec != VNOVAL) { 624 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 625 error = EROFS; 626 goto out; 627 } 628 if ((ip->i_flags & SF_SNAPSHOT) != 0) { 629 error = EPERM; 630 goto out; 631 } 632 error = genfs_can_chtimes(vp, vap->va_vaflags, ip->i_uid, cred); 633 if (error) 634 goto out; 635 error = UFS_WAPBL_BEGIN(vp->v_mount); 636 if (error) 637 goto out; 638 if (vap->va_atime.tv_sec != VNOVAL) 639 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) 640 ip->i_flag |= IN_ACCESS; 641 if (vap->va_mtime.tv_sec != VNOVAL) { 642 ip->i_flag |= IN_CHANGE | IN_UPDATE; 643 if (vp->v_mount->mnt_flag & MNT_RELATIME) 644 ip->i_flag |= IN_ACCESS; 645 } 646 if (vap->va_birthtime.tv_sec != VNOVAL && 647 ip->i_ump->um_fstype == UFS2) { 648 ip->i_ffs2_birthtime = vap->va_birthtime.tv_sec; 649 ip->i_ffs2_birthnsec = vap->va_birthtime.tv_nsec; 650 } 651 error = UFS_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0); 652 UFS_WAPBL_END(vp->v_mount); 653 if (error) 654 goto out; 655 } 656 error = 0; 657 if (vap->va_mode != (mode_t)VNOVAL) { 658 if (vp->v_mount->mnt_flag & MNT_RDONLY) { 659 error = EROFS; 660 goto out; 661 } 662 if ((ip->i_flags & SF_SNAPSHOT) != 0 && 663 (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | 664 S_IXOTH | S_IWOTH))) { 665 error = EPERM; 666 goto out; 667 } 668 error = UFS_WAPBL_BEGIN(vp->v_mount); 669 if (error) 670 goto out; 671 error = ufs_chmod(vp, (int)vap->va_mode, cred, l); 672 UFS_WAPBL_END(vp->v_mount); 673 } 674 VN_KNOTE(vp, NOTE_ATTRIB); 675 out: 676 fstrans_done(vp->v_mount); 677 return (error); 678 } 679 680 /* 681 * Change the mode on a file. 682 * Inode must be locked before calling. 683 */ 684 static int 685 ufs_chmod(struct vnode *vp, int mode, kauth_cred_t cred, struct lwp *l) 686 { 687 struct inode *ip; 688 int error; 689 690 UFS_WAPBL_JLOCK_ASSERT(vp->v_mount); 691 692 ip = VTOI(vp); 693 694 error = genfs_can_chmod(vp, cred, ip->i_uid, ip->i_gid, mode); 695 if (error) 696 return (error); 697 698 fstrans_start(vp->v_mount, FSTRANS_SHARED); 699 ip->i_mode &= ~ALLPERMS; 700 ip->i_mode |= (mode & ALLPERMS); 701 ip->i_flag |= IN_CHANGE; 702 DIP_ASSIGN(ip, mode, ip->i_mode); 703 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 704 fstrans_done(vp->v_mount); 705 return (0); 706 } 707 708 /* 709 * Perform chown operation on inode ip; 710 * inode must be locked prior to call. 711 */ 712 static int 713 ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, kauth_cred_t cred, 714 struct lwp *l) 715 { 716 struct inode *ip; 717 int error = 0; 718 #if defined(QUOTA) || defined(QUOTA2) 719 uid_t ouid; 720 gid_t ogid; 721 int64_t change; 722 #endif 723 ip = VTOI(vp); 724 error = 0; 725 726 if (uid == (uid_t)VNOVAL) 727 uid = ip->i_uid; 728 if (gid == (gid_t)VNOVAL) 729 gid = ip->i_gid; 730 731 error = genfs_can_chown(vp, cred, ip->i_uid, ip->i_gid, uid, gid); 732 if (error) 733 return (error); 734 735 fstrans_start(vp->v_mount, FSTRANS_SHARED); 736 #if defined(QUOTA) || defined(QUOTA2) 737 ogid = ip->i_gid; 738 ouid = ip->i_uid; 739 change = DIP(ip, blocks); 740 (void) chkdq(ip, -change, cred, 0); 741 (void) chkiq(ip, -1, cred, 0); 742 #endif 743 ip->i_gid = gid; 744 DIP_ASSIGN(ip, gid, gid); 745 ip->i_uid = uid; 746 DIP_ASSIGN(ip, uid, uid); 747 #if defined(QUOTA) || defined(QUOTA2) 748 if ((error = chkdq(ip, change, cred, 0)) == 0) { 749 if ((error = chkiq(ip, 1, cred, 0)) == 0) 750 goto good; 751 else 752 (void) chkdq(ip, -change, cred, FORCE); 753 } 754 ip->i_gid = ogid; 755 DIP_ASSIGN(ip, gid, ogid); 756 ip->i_uid = ouid; 757 DIP_ASSIGN(ip, uid, ouid); 758 (void) chkdq(ip, change, cred, FORCE); 759 (void) chkiq(ip, 1, cred, FORCE); 760 fstrans_done(vp->v_mount); 761 return (error); 762 good: 763 #endif /* QUOTA || QUOTA2 */ 764 ip->i_flag |= IN_CHANGE; 765 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 766 fstrans_done(vp->v_mount); 767 return (0); 768 } 769 770 int 771 ufs_remove(void *v) 772 { 773 struct vop_remove_args /* { 774 struct vnode *a_dvp; 775 struct vnode *a_vp; 776 struct componentname *a_cnp; 777 } */ *ap = v; 778 struct vnode *vp, *dvp; 779 struct inode *ip; 780 int error; 781 struct ufs_lookup_results *ulr; 782 783 vp = ap->a_vp; 784 dvp = ap->a_dvp; 785 ip = VTOI(vp); 786 787 /* XXX should handle this material another way */ 788 ulr = &VTOI(dvp)->i_crap; 789 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 790 791 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 792 if (vp->v_type == VDIR || (ip->i_flags & (IMMUTABLE | APPEND)) || 793 (VTOI(dvp)->i_flags & APPEND)) 794 error = EPERM; 795 else { 796 error = UFS_WAPBL_BEGIN(dvp->v_mount); 797 if (error == 0) { 798 error = ufs_dirremove(dvp, ulr, 799 ip, ap->a_cnp->cn_flags, 0); 800 UFS_WAPBL_END(dvp->v_mount); 801 } 802 } 803 VN_KNOTE(vp, NOTE_DELETE); 804 VN_KNOTE(dvp, NOTE_WRITE); 805 if (dvp == vp) 806 vrele(vp); 807 else 808 vput(vp); 809 vput(dvp); 810 fstrans_done(dvp->v_mount); 811 return (error); 812 } 813 814 /* 815 * ufs_link: create hard link. 816 */ 817 int 818 ufs_link(void *v) 819 { 820 struct vop_link_args /* { 821 struct vnode *a_dvp; 822 struct vnode *a_vp; 823 struct componentname *a_cnp; 824 } */ *ap = v; 825 struct vnode *dvp = ap->a_dvp; 826 struct vnode *vp = ap->a_vp; 827 struct componentname *cnp = ap->a_cnp; 828 struct inode *ip; 829 struct direct *newdir; 830 int error; 831 struct ufs_lookup_results *ulr; 832 833 KASSERT(dvp != vp); 834 KASSERT(vp->v_type != VDIR); 835 KASSERT(dvp->v_mount == vp->v_mount); 836 837 /* XXX should handle this material another way */ 838 ulr = &VTOI(dvp)->i_crap; 839 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 840 841 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 842 error = vn_lock(vp, LK_EXCLUSIVE); 843 if (error) { 844 VOP_ABORTOP(dvp, cnp); 845 goto out2; 846 } 847 ip = VTOI(vp); 848 if ((nlink_t)ip->i_nlink >= LINK_MAX) { 849 VOP_ABORTOP(dvp, cnp); 850 error = EMLINK; 851 goto out1; 852 } 853 if (ip->i_flags & (IMMUTABLE | APPEND)) { 854 VOP_ABORTOP(dvp, cnp); 855 error = EPERM; 856 goto out1; 857 } 858 error = UFS_WAPBL_BEGIN(vp->v_mount); 859 if (error) { 860 VOP_ABORTOP(dvp, cnp); 861 goto out1; 862 } 863 ip->i_nlink++; 864 DIP_ASSIGN(ip, nlink, ip->i_nlink); 865 ip->i_flag |= IN_CHANGE; 866 error = UFS_UPDATE(vp, NULL, NULL, UPDATE_DIROP); 867 if (!error) { 868 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 869 ufs_makedirentry(ip, cnp, newdir); 870 error = ufs_direnter(dvp, ulr, vp, newdir, cnp, NULL); 871 pool_cache_put(ufs_direct_cache, newdir); 872 } 873 if (error) { 874 ip->i_nlink--; 875 DIP_ASSIGN(ip, nlink, ip->i_nlink); 876 ip->i_flag |= IN_CHANGE; 877 UFS_WAPBL_UPDATE(vp, NULL, NULL, UPDATE_DIROP); 878 } 879 UFS_WAPBL_END(vp->v_mount); 880 out1: 881 VOP_UNLOCK(vp); 882 out2: 883 VN_KNOTE(vp, NOTE_LINK); 884 VN_KNOTE(dvp, NOTE_WRITE); 885 vput(dvp); 886 fstrans_done(dvp->v_mount); 887 return (error); 888 } 889 890 /* 891 * whiteout vnode call 892 */ 893 int 894 ufs_whiteout(void *v) 895 { 896 struct vop_whiteout_args /* { 897 struct vnode *a_dvp; 898 struct componentname *a_cnp; 899 int a_flags; 900 } */ *ap = v; 901 struct vnode *dvp = ap->a_dvp; 902 struct componentname *cnp = ap->a_cnp; 903 struct direct *newdir; 904 int error; 905 struct ufsmount *ump = VFSTOUFS(dvp->v_mount); 906 struct ufs_lookup_results *ulr; 907 908 /* XXX should handle this material another way */ 909 ulr = &VTOI(dvp)->i_crap; 910 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 911 912 error = 0; 913 switch (ap->a_flags) { 914 case LOOKUP: 915 /* 4.4 format directories support whiteout operations */ 916 if (ump->um_maxsymlinklen > 0) 917 return (0); 918 return (EOPNOTSUPP); 919 920 case CREATE: 921 /* create a new directory whiteout */ 922 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 923 error = UFS_WAPBL_BEGIN(dvp->v_mount); 924 if (error) 925 break; 926 #ifdef DIAGNOSTIC 927 if (ump->um_maxsymlinklen <= 0) 928 panic("ufs_whiteout: old format filesystem"); 929 #endif 930 931 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 932 newdir->d_ino = WINO; 933 newdir->d_namlen = cnp->cn_namelen; 934 memcpy(newdir->d_name, cnp->cn_nameptr, 935 (size_t)cnp->cn_namelen); 936 newdir->d_name[cnp->cn_namelen] = '\0'; 937 newdir->d_type = DT_WHT; 938 error = ufs_direnter(dvp, ulr, NULL, newdir, cnp, NULL); 939 pool_cache_put(ufs_direct_cache, newdir); 940 break; 941 942 case DELETE: 943 /* remove an existing directory whiteout */ 944 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 945 error = UFS_WAPBL_BEGIN(dvp->v_mount); 946 if (error) 947 break; 948 #ifdef DIAGNOSTIC 949 if (ump->um_maxsymlinklen <= 0) 950 panic("ufs_whiteout: old format filesystem"); 951 #endif 952 953 cnp->cn_flags &= ~DOWHITEOUT; 954 error = ufs_dirremove(dvp, ulr, NULL, cnp->cn_flags, 0); 955 break; 956 default: 957 panic("ufs_whiteout: unknown op"); 958 /* NOTREACHED */ 959 } 960 UFS_WAPBL_END(dvp->v_mount); 961 fstrans_done(dvp->v_mount); 962 return (error); 963 } 964 965 966 /* 967 * Rename vnode operation 968 * rename("foo", "bar"); 969 * is essentially 970 * unlink("bar"); 971 * link("foo", "bar"); 972 * unlink("foo"); 973 * but ``atomically''. Can't do full commit without saving state in the 974 * inode on disk which isn't feasible at this time. Best we can do is 975 * always guarantee the target exists. 976 * 977 * Basic algorithm is: 978 * 979 * 1) Bump link count on source while we're linking it to the 980 * target. This also ensure the inode won't be deleted out 981 * from underneath us while we work (it may be truncated by 982 * a concurrent `trunc' or `open' for creation). 983 * 2) Link source to destination. If destination already exists, 984 * delete it first. 985 * 3) Unlink source reference to inode if still around. If a 986 * directory was moved and the parent of the destination 987 * is different from the source, patch the ".." entry in the 988 * directory. 989 */ 990 991 /* 992 * Notes on rename locking: 993 * 994 * We lock parent vnodes before child vnodes. This means in particular 995 * that if A is above B in the directory tree then A must be locked 996 * before B. (This is true regardless of how many steps appear in 997 * between, because an arbitrary number of other processes could lock 998 * parent/child in between and establish a lock cycle and deadlock.) 999 * 1000 * Therefore, if tdvp is above fdvp we must lock tdvp first; if fdvp 1001 * is above tdvp we must lock fdvp first; and if they're 1002 * incommensurate it doesn't matter. (But, we rely on the fact that 1003 * there's a whole-volume rename lock to prevent deadlock among groups 1004 * of renames upon overlapping sets of incommensurate vnodes.) 1005 * 1006 * In addition to establishing lock ordering the parent check also 1007 * serves to rule out cases where someone tries to move a directory 1008 * underneath itself, e.g. rename("a/b", "a/b/c"). If allowed to 1009 * proceed such renames would detach portions of the directory tree 1010 * and make fsck very unhappy. 1011 * 1012 * Note that it is an error for *fvp* to be above tdvp; however, 1013 * *fdvp* can be above tdvp, as in rename("a/b", "a/c/d"). 1014 * 1015 * The parent check searches up the tree from tdvp until it either 1016 * finds fdvp or the root of the volume. It also returns the vnode it 1017 * saw immediately before fdvp, if any. Later on (after looking up 1018 * fvp) we will check to see if this *is* fvp and if so fail. 1019 * 1020 * If the parent check finds fdvp, it means fdvp is above tdvp, so we 1021 * lock fdvp first and then tdvp. Otherwise, either tdvp is above fdvp 1022 * or they're incommensurate and we lock tdvp first. 1023 * 1024 * In either case each of the child vnodes has to be looked up and 1025 * locked immediately after its parent. The cases 1026 * 1027 * fdvp/fvp/[.../]tdvp/tvp 1028 * tdvp/tvp/[.../]fdvp/fvp 1029 * 1030 * can cause deadlock otherwise. Note that both of these are error 1031 * cases; the first fails the parent check and the second fails 1032 * because tvp isn't empty. The parent check case is handled before 1033 * we start locking; however, the nonempty case requires locking tvp 1034 * to find out safely that it's nonempty. 1035 * 1036 * Therefore the procedure is either 1037 * 1038 * lock fdvp 1039 * lookup fvp 1040 * lock fvp 1041 * lock tdvp 1042 * lookup tvp 1043 * lock tvp 1044 * 1045 * or 1046 * 1047 * lock tdvp 1048 * lookup tvp 1049 * lock tvp 1050 * lock fdvp 1051 * lookup fvp 1052 * lock fvp 1053 * 1054 * This could in principle be simplified by always looking up fvp 1055 * last; because of the parent check we know by the time we start 1056 * locking that fvp cannot be directly above tdvp, so (given the 1057 * whole-volume rename lock and other assumptions) it's safe to lock 1058 * tdvp before fvp. This would allow the following scheme: 1059 * 1060 * lock fdvp 1061 * lock tdvp 1062 * or 1063 * lock tdvp 1064 * lock fdvp 1065 * 1066 * then 1067 * lookup tvp 1068 * lock tvp 1069 * lookup fvp 1070 * check if fvp is above of tdvp, fail if so 1071 * lock fvp 1072 * 1073 * which is much, much simpler. 1074 * 1075 * However, current levels of vfs namei/lookup sanity do not permit 1076 * this. It is impossible currently to look up fvp without locking it. 1077 * (It gets locked regardless of whether LOCKLEAF is set; without 1078 * LOCKLEAF it just gets unlocked again, which doesn't help.) 1079 * 1080 * Therefore, because we must look up fvp to know if it's above tdvp, 1081 * which locks fvp, we must, at least in the case where fdvp is above 1082 * tdvp, do that before locking tdvp. The longer scheme does that; the 1083 * simpler scheme is not safe. 1084 * 1085 * Note that for now we aren't doing lookup() but relookup(); however, 1086 * the differences are minor. 1087 * 1088 * On top of all the above, just to make everything more 1089 * exciting, any two of the vnodes might end up being the same. 1090 * 1091 * FROMPARENT == FROMCHILD mv a/. foo is an error. 1092 * FROMPARENT == TOPARENT mv a/b a/c is ok. 1093 * FROMPARENT == TOCHILD mv a/b/c a/b will give ENOTEMPTY. 1094 * FROMCHILD == TOPARENT mv a/b a/b/c fails the parent check. 1095 * FROMCHILD == TOCHILD mv a/b a/b is ok. 1096 * TOPARENT == TOCHILD mv foo a/. is an error. 1097 * 1098 * This introduces more cases in the locking, because each distinct 1099 * vnode must be locked exactly once. 1100 * 1101 * When FROMPARENT == TOPARENT and FROMCHILD != TOCHILD we assume it 1102 * doesn't matter what order the children are locked in, because the 1103 * per-volume rename lock excludes other renames and no other 1104 * operation locks two files in the same directory at once. (Note: if 1105 * it turns out that link() does, link() is wrong.) 1106 * 1107 * Until such time as we can do lookups without the namei and lookup 1108 * machinery "helpfully" locking the result vnode for us, we can't 1109 * avoid tripping on cases where FROMCHILD == TOCHILD. Currently for 1110 * non-directories we unlock the first one we lock while looking up 1111 * the second, then relock it if necessary. This is more or less 1112 * harmless since not much of interest can happen to the objects in 1113 * that window while we have the containing directory locked; but it's 1114 * not desirable and should be cleaned up when that becomes possible. 1115 * The right way to do it is to check after looking the second one up 1116 * and only lock it if it's different. (Note: for directories we don't 1117 * do this dance because the same directory can't appear more than 1118 * once.) 1119 */ 1120 1121 /* XXX following lifted from ufs_lookup.c */ 1122 #define FSFMT(vp) (((vp)->v_mount->mnt_iflag & IMNT_DTYPE) == 0) 1123 1124 /* 1125 * Check if either entry referred to by FROM_ULR is within the range 1126 * of entries named by TO_ULR. 1127 */ 1128 static int 1129 ulr_overlap(const struct ufs_lookup_results *from_ulr, 1130 const struct ufs_lookup_results *to_ulr) 1131 { 1132 doff_t from_start, from_prevstart; 1133 doff_t to_start, to_end; 1134 1135 /* 1136 * FROM is a DELETE result; offset points to the entry to 1137 * remove and subtracting count gives the previous entry. 1138 */ 1139 from_start = from_ulr->ulr_offset - from_ulr->ulr_count; 1140 from_prevstart = from_ulr->ulr_offset; 1141 1142 /* 1143 * TO is a RENAME (thus non-DELETE) result; offset points 1144 * to the beginning of a region to write in, and adding 1145 * count gives the end of the region. 1146 */ 1147 to_start = to_ulr->ulr_offset; 1148 to_end = to_ulr->ulr_offset + to_ulr->ulr_count; 1149 1150 if (from_prevstart >= to_start && from_prevstart < to_end) { 1151 return 1; 1152 } 1153 if (from_start >= to_start && from_start < to_end) { 1154 return 1; 1155 } 1156 return 0; 1157 } 1158 1159 /* 1160 * Wrapper for relookup that also updates the supplemental results. 1161 */ 1162 static int 1163 do_relookup(struct vnode *dvp, struct ufs_lookup_results *ulr, 1164 struct vnode **vp, struct componentname *cnp) 1165 { 1166 int error; 1167 1168 error = relookup(dvp, vp, cnp, 0); 1169 if (error) { 1170 return error; 1171 } 1172 /* update the supplemental reasults */ 1173 *ulr = VTOI(dvp)->i_crap; 1174 UFS_CHECK_CRAPCOUNTER(VTOI(dvp)); 1175 return 0; 1176 } 1177 1178 /* 1179 * Lock and relookup a sequence of two directories and two children. 1180 * 1181 */ 1182 static int 1183 lock_vnode_sequence(struct vnode *d1, struct ufs_lookup_results *ulr1, 1184 struct vnode **v1_ret, struct componentname *cn1, 1185 int v1_missing_ok, 1186 int overlap_error, 1187 struct vnode *d2, struct ufs_lookup_results *ulr2, 1188 struct vnode **v2_ret, struct componentname *cn2, 1189 int v2_missing_ok) 1190 { 1191 struct vnode *v1, *v2; 1192 int error; 1193 1194 KASSERT(d1 != d2); 1195 1196 vn_lock(d1, LK_EXCLUSIVE | LK_RETRY); 1197 if (VTOI(d1)->i_size == 0) { 1198 /* d1 has been rmdir'd */ 1199 VOP_UNLOCK(d1); 1200 return ENOENT; 1201 } 1202 error = do_relookup(d1, ulr1, &v1, cn1); 1203 if (v1_missing_ok) { 1204 if (error == ENOENT) { 1205 /* 1206 * Note: currently if the name doesn't exist, 1207 * relookup succeeds (it intercepts the 1208 * EJUSTRETURN from VOP_LOOKUP) and sets tvp 1209 * to NULL. Therefore, we will never get 1210 * ENOENT and this branch is not needed. 1211 * However, in a saner future the EJUSTRETURN 1212 * garbage will go away, so let's DTRT. 1213 */ 1214 v1 = NULL; 1215 error = 0; 1216 } 1217 } else { 1218 if (error == 0 && v1 == NULL) { 1219 /* This is what relookup sets if v1 disappeared. */ 1220 error = ENOENT; 1221 } 1222 } 1223 if (error) { 1224 VOP_UNLOCK(d1); 1225 return error; 1226 } 1227 if (v1 && v1 == d2) { 1228 VOP_UNLOCK(d1); 1229 VOP_UNLOCK(v1); 1230 vrele(v1); 1231 return overlap_error; 1232 } 1233 1234 /* 1235 * The right way to do this is to do lookups without locking 1236 * the results, and lock the results afterwards; then at the 1237 * end we can avoid trying to lock v2 if v2 == v1. 1238 * 1239 * However, for the reasons described in the fdvp == tdvp case 1240 * in rename below, we can't do that safely. So, in the case 1241 * where v1 is not a directory, unlock it and lock it again 1242 * afterwards. This is safe in locking order because a 1243 * non-directory can't be above anything else in the tree. If 1244 * v1 *is* a directory, that's not true, but then because d1 1245 * != d2, v1 != v2. 1246 */ 1247 if (v1 && v1->v_type != VDIR) { 1248 VOP_UNLOCK(v1); 1249 } 1250 vn_lock(d2, LK_EXCLUSIVE | LK_RETRY); 1251 if (VTOI(d2)->i_size == 0) { 1252 /* d2 has been rmdir'd */ 1253 VOP_UNLOCK(d2); 1254 if (v1 && v1->v_type == VDIR) { 1255 VOP_UNLOCK(v1); 1256 } 1257 VOP_UNLOCK(d1); 1258 if (v1) { 1259 vrele(v1); 1260 } 1261 return ENOENT; 1262 } 1263 error = do_relookup(d2, ulr2, &v2, cn2); 1264 if (v2_missing_ok) { 1265 if (error == ENOENT) { 1266 /* as above */ 1267 v2 = NULL; 1268 error = 0; 1269 } 1270 } else { 1271 if (error == 0 && v2 == NULL) { 1272 /* This is what relookup sets if v2 disappeared. */ 1273 error = ENOENT; 1274 } 1275 } 1276 if (error) { 1277 VOP_UNLOCK(d2); 1278 if (v1 && v1->v_type == VDIR) { 1279 VOP_UNLOCK(v1); 1280 } 1281 VOP_UNLOCK(d1); 1282 if (v1) { 1283 vrele(v1); 1284 } 1285 return error; 1286 } 1287 if (v1 && v1->v_type != VDIR && v1 != v2) { 1288 vn_lock(v1, LK_EXCLUSIVE | LK_RETRY); 1289 } 1290 *v1_ret = v1; 1291 *v2_ret = v2; 1292 return 0; 1293 } 1294 1295 /* 1296 * Rename vnode operation 1297 * rename("foo", "bar"); 1298 * is essentially 1299 * unlink("bar"); 1300 * link("foo", "bar"); 1301 * unlink("foo"); 1302 * but ``atomically''. Can't do full commit without saving state in the 1303 * inode on disk which isn't feasible at this time. Best we can do is 1304 * always guarantee the target exists. 1305 * 1306 * Basic algorithm is: 1307 * 1308 * 1) Bump link count on source while we're linking it to the 1309 * target. This also ensure the inode won't be deleted out 1310 * from underneath us while we work (it may be truncated by 1311 * a concurrent `trunc' or `open' for creation). 1312 * 2) Link source to destination. If destination already exists, 1313 * delete it first. 1314 * 3) Unlink source reference to inode if still around. If a 1315 * directory was moved and the parent of the destination 1316 * is different from the source, patch the ".." entry in the 1317 * directory. 1318 */ 1319 int 1320 ufs_rename(void *v) 1321 { 1322 struct vop_rename_args /* { 1323 struct vnode *a_fdvp; 1324 struct vnode *a_fvp; 1325 struct componentname *a_fcnp; 1326 struct vnode *a_tdvp; 1327 struct vnode *a_tvp; 1328 struct componentname *a_tcnp; 1329 } */ *ap = v; 1330 struct vnode *tvp, *tdvp, *fvp, *fdvp; 1331 struct componentname *tcnp, *fcnp; 1332 struct inode *ip, *txp, *fxp, *tdp, *fdp; 1333 struct mount *mp; 1334 struct direct *newdir; 1335 int doingdirectory, error; 1336 ino_t oldparent, newparent; 1337 1338 struct ufs_lookup_results from_ulr, to_ulr; 1339 1340 tvp = ap->a_tvp; 1341 tdvp = ap->a_tdvp; 1342 fvp = ap->a_fvp; 1343 fdvp = ap->a_fdvp; 1344 tcnp = ap->a_tcnp; 1345 fcnp = ap->a_fcnp; 1346 doingdirectory = error = 0; 1347 oldparent = newparent = 0; 1348 1349 /* save the supplemental lookup results as they currently exist */ 1350 from_ulr = VTOI(fdvp)->i_crap; 1351 to_ulr = VTOI(tdvp)->i_crap; 1352 UFS_CHECK_CRAPCOUNTER(VTOI(fdvp)); 1353 UFS_CHECK_CRAPCOUNTER(VTOI(tdvp)); 1354 1355 /* 1356 * Owing to VFS oddities we are currently called with tdvp/tvp 1357 * locked and not fdvp/fvp. In a sane world we'd be passed 1358 * tdvp and fdvp only, unlocked, and two name strings. Pretend 1359 * we have a sane world and unlock tdvp and tvp. 1360 */ 1361 VOP_UNLOCK(tdvp); 1362 if (tvp && tvp != tdvp) { 1363 VOP_UNLOCK(tvp); 1364 } 1365 1366 /* Also pretend we have a sane world and vrele fvp/tvp. */ 1367 vrele(fvp); 1368 fvp = NULL; 1369 if (tvp) { 1370 vrele(tvp); 1371 tvp = NULL; 1372 } 1373 1374 /* 1375 * Check for cross-device rename. 1376 */ 1377 if (fdvp->v_mount != tdvp->v_mount) { 1378 error = EXDEV; 1379 goto abort; 1380 } 1381 1382 /* 1383 * Reject "." and ".." 1384 */ 1385 if ((fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || 1386 (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 1387 (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.')) { 1388 error = EINVAL; 1389 goto abort; 1390 } 1391 1392 /* 1393 * Get locks. 1394 */ 1395 1396 /* paranoia */ 1397 fcnp->cn_flags |= LOCKPARENT|LOCKLEAF; 1398 tcnp->cn_flags |= LOCKPARENT|LOCKLEAF; 1399 1400 if (fdvp == tdvp) { 1401 /* One directory. Lock it and relookup both children. */ 1402 vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); 1403 1404 if (VTOI(fdvp)->i_size == 0) { 1405 /* directory has been rmdir'd */ 1406 VOP_UNLOCK(fdvp); 1407 error = ENOENT; 1408 goto abort; 1409 } 1410 1411 error = do_relookup(fdvp, &from_ulr, &fvp, fcnp); 1412 if (error == 0 && fvp == NULL) { 1413 /* relookup may produce this if fvp disappears */ 1414 error = ENOENT; 1415 } 1416 if (error) { 1417 VOP_UNLOCK(fdvp); 1418 goto abort; 1419 } 1420 1421 /* 1422 * The right way to do this is to look up both children 1423 * without locking either, and then lock both unless they 1424 * turn out to be the same. However, due to deep-seated 1425 * VFS-level issues all lookups lock the child regardless 1426 * of whether LOCKLEAF is set (if LOCKLEAF is not set, 1427 * the child is locked during lookup and then unlocked) 1428 * so it is not safe to look up tvp while fvp is locked. 1429 * 1430 * Unlocking fvp here temporarily is more or less safe, 1431 * because with the directory locked there's not much 1432 * that can happen to it. However, ideally it wouldn't 1433 * be necessary. XXX. 1434 */ 1435 VOP_UNLOCK(fvp); 1436 /* remember fdvp == tdvp so tdvp is locked */ 1437 error = do_relookup(tdvp, &to_ulr, &tvp, tcnp); 1438 if (error && error != ENOENT) { 1439 VOP_UNLOCK(fdvp); 1440 goto abort; 1441 } 1442 if (error == ENOENT) { 1443 /* 1444 * Note: currently if the name doesn't exist, 1445 * relookup succeeds (it intercepts the 1446 * EJUSTRETURN from VOP_LOOKUP) and sets tvp 1447 * to NULL. Therefore, we will never get 1448 * ENOENT and this branch is not needed. 1449 * However, in a saner future the EJUSTRETURN 1450 * garbage will go away, so let's DTRT. 1451 */ 1452 tvp = NULL; 1453 } 1454 1455 /* tvp is locked; lock fvp if necessary */ 1456 if (!tvp || tvp != fvp) { 1457 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); 1458 } 1459 } else { 1460 int found_fdvp; 1461 struct vnode *illegal_fvp; 1462 1463 /* 1464 * The source must not be above the destination. (If 1465 * it were, the rename would detach a section of the 1466 * tree.) 1467 * 1468 * Look up the tree from tdvp to see if we find fdvp, 1469 * and if so, return the immediate child of fdvp we're 1470 * under; that must not turn out to be the same as 1471 * fvp. 1472 * 1473 * The per-volume rename lock guarantees that the 1474 * result of this check remains true until we finish 1475 * looking up and locking. 1476 */ 1477 error = ufs_parentcheck(fdvp, tdvp, fcnp->cn_cred, 1478 &found_fdvp, &illegal_fvp); 1479 if (error) { 1480 goto abort; 1481 } 1482 1483 /* Must lock in tree order. */ 1484 1485 if (found_fdvp) { 1486 /* fdvp -> fvp -> tdvp -> tvp */ 1487 error = lock_vnode_sequence(fdvp, &from_ulr, 1488 &fvp, fcnp, 0, 1489 EINVAL, 1490 tdvp, &to_ulr, 1491 &tvp, tcnp, 1); 1492 } else { 1493 /* tdvp -> tvp -> fdvp -> fvp */ 1494 error = lock_vnode_sequence(tdvp, &to_ulr, 1495 &tvp, tcnp, 1, 1496 ENOTEMPTY, 1497 fdvp, &from_ulr, 1498 &fvp, fcnp, 0); 1499 } 1500 if (error) { 1501 if (illegal_fvp) { 1502 vrele(illegal_fvp); 1503 } 1504 goto abort; 1505 } 1506 KASSERT(fvp != NULL); 1507 1508 if (illegal_fvp && fvp == illegal_fvp) { 1509 vrele(illegal_fvp); 1510 error = EINVAL; 1511 goto abort_withlocks; 1512 } 1513 1514 if (illegal_fvp) { 1515 vrele(illegal_fvp); 1516 } 1517 } 1518 1519 KASSERT(fdvp && VOP_ISLOCKED(fdvp)); 1520 KASSERT(fvp && VOP_ISLOCKED(fvp)); 1521 KASSERT(tdvp && VOP_ISLOCKED(tdvp)); 1522 KASSERT(tvp == NULL || VOP_ISLOCKED(tvp)); 1523 1524 /* --- everything is now locked --- */ 1525 1526 if (tvp && ((VTOI(tvp)->i_flags & (IMMUTABLE | APPEND)) || 1527 (VTOI(tdvp)->i_flags & APPEND))) { 1528 error = EPERM; 1529 goto abort_withlocks; 1530 } 1531 1532 /* 1533 * Check if just deleting a link name. 1534 */ 1535 if (fvp == tvp) { 1536 if (fvp->v_type == VDIR) { 1537 error = EINVAL; 1538 goto abort_withlocks; 1539 } 1540 1541 /* Release destination completely. Leave fdvp locked. */ 1542 VOP_ABORTOP(tdvp, tcnp); 1543 if (fdvp != tdvp) { 1544 VOP_UNLOCK(tdvp); 1545 } 1546 VOP_UNLOCK(tvp); 1547 vrele(tdvp); 1548 vrele(tvp); 1549 1550 /* Delete source. */ 1551 /* XXX: do we really need to relookup again? */ 1552 1553 /* 1554 * fdvp is still locked, but we just unlocked fvp 1555 * (because fvp == tvp) so just decref fvp 1556 */ 1557 vrele(fvp); 1558 fcnp->cn_flags &= ~(MODMASK); 1559 fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; 1560 fcnp->cn_nameiop = DELETE; 1561 if ((error = relookup(fdvp, &fvp, fcnp, 0))) { 1562 vput(fdvp); 1563 return (error); 1564 } 1565 return (VOP_REMOVE(fdvp, fvp, fcnp)); 1566 } 1567 fdp = VTOI(fdvp); 1568 ip = VTOI(fvp); 1569 if ((nlink_t) ip->i_nlink >= LINK_MAX) { 1570 error = EMLINK; 1571 goto abort_withlocks; 1572 } 1573 if ((ip->i_flags & (IMMUTABLE | APPEND)) || 1574 (fdp->i_flags & APPEND)) { 1575 error = EPERM; 1576 goto abort_withlocks; 1577 } 1578 if ((ip->i_mode & IFMT) == IFDIR) { 1579 /* 1580 * Avoid ".", "..", and aliases of "." for obvious reasons. 1581 */ 1582 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || 1583 fdp == ip || 1584 (fcnp->cn_flags & ISDOTDOT) || 1585 (tcnp->cn_flags & ISDOTDOT) || 1586 (ip->i_flag & IN_RENAME)) { 1587 error = EINVAL; 1588 goto abort_withlocks; 1589 } 1590 ip->i_flag |= IN_RENAME; 1591 doingdirectory = 1; 1592 } 1593 oldparent = fdp->i_number; 1594 VN_KNOTE(fdvp, NOTE_WRITE); /* XXXLUKEM/XXX: right place? */ 1595 1596 /* 1597 * Both the directory 1598 * and target vnodes are locked. 1599 */ 1600 tdp = VTOI(tdvp); 1601 txp = NULL; 1602 if (tvp) 1603 txp = VTOI(tvp); 1604 1605 mp = fdvp->v_mount; 1606 fstrans_start(mp, FSTRANS_SHARED); 1607 1608 if (oldparent != tdp->i_number) 1609 newparent = tdp->i_number; 1610 1611 /* 1612 * If ".." must be changed (ie the directory gets a new 1613 * parent) the user must have write permission in the source 1614 * so as to be able to change "..". 1615 */ 1616 if (doingdirectory && newparent) { 1617 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred); 1618 if (error) 1619 goto out; 1620 } 1621 1622 KASSERT(fdvp != tvp); 1623 1624 if (newparent) { 1625 /* Check for the rename("foo/foo", "foo") case. */ 1626 if (fdvp == tvp) { 1627 error = doingdirectory ? ENOTEMPTY : EISDIR; 1628 goto out; 1629 } 1630 } 1631 1632 fxp = VTOI(fvp); 1633 fdp = VTOI(fdvp); 1634 1635 error = UFS_WAPBL_BEGIN(fdvp->v_mount); 1636 if (error) 1637 goto out2; 1638 1639 /* 1640 * 1) Bump link count while we're moving stuff 1641 * around. If we crash somewhere before 1642 * completing our work, the link count 1643 * may be wrong, but correctable. 1644 */ 1645 ip->i_nlink++; 1646 DIP_ASSIGN(ip, nlink, ip->i_nlink); 1647 ip->i_flag |= IN_CHANGE; 1648 if ((error = UFS_UPDATE(fvp, NULL, NULL, UPDATE_DIROP)) != 0) { 1649 goto bad; 1650 } 1651 1652 /* 1653 * 2) If target doesn't exist, link the target 1654 * to the source and unlink the source. 1655 * Otherwise, rewrite the target directory 1656 * entry to reference the source inode and 1657 * expunge the original entry's existence. 1658 */ 1659 if (txp == NULL) { 1660 if (tdp->i_dev != ip->i_dev) 1661 panic("rename: EXDEV"); 1662 /* 1663 * Account for ".." in new directory. 1664 * When source and destination have the same 1665 * parent we don't fool with the link count. 1666 */ 1667 if (doingdirectory && newparent) { 1668 if ((nlink_t)tdp->i_nlink >= LINK_MAX) { 1669 error = EMLINK; 1670 goto bad; 1671 } 1672 tdp->i_nlink++; 1673 DIP_ASSIGN(tdp, nlink, tdp->i_nlink); 1674 tdp->i_flag |= IN_CHANGE; 1675 if ((error = UFS_UPDATE(tdvp, NULL, NULL, 1676 UPDATE_DIROP)) != 0) { 1677 tdp->i_nlink--; 1678 DIP_ASSIGN(tdp, nlink, tdp->i_nlink); 1679 tdp->i_flag |= IN_CHANGE; 1680 goto bad; 1681 } 1682 } 1683 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 1684 ufs_makedirentry(ip, tcnp, newdir); 1685 error = ufs_direnter(tdvp, &to_ulr, 1686 NULL, newdir, tcnp, NULL); 1687 pool_cache_put(ufs_direct_cache, newdir); 1688 if (error != 0) { 1689 if (doingdirectory && newparent) { 1690 tdp->i_nlink--; 1691 DIP_ASSIGN(tdp, nlink, tdp->i_nlink); 1692 tdp->i_flag |= IN_CHANGE; 1693 (void)UFS_UPDATE(tdvp, NULL, NULL, 1694 UPDATE_WAIT | UPDATE_DIROP); 1695 } 1696 goto bad; 1697 } 1698 VN_KNOTE(tdvp, NOTE_WRITE); 1699 } else { 1700 if (txp->i_dev != tdp->i_dev || txp->i_dev != ip->i_dev) 1701 panic("rename: EXDEV"); 1702 /* 1703 * Short circuit rename(foo, foo). 1704 */ 1705 if (txp->i_number == ip->i_number) 1706 panic("rename: same file"); 1707 /* 1708 * If the parent directory is "sticky", then the user must 1709 * own the parent directory, or the destination of the rename, 1710 * otherwise the destination may not be changed (except by 1711 * root). This implements append-only directories. 1712 */ 1713 if ((tdp->i_mode & S_ISTXT) && 1714 kauth_authorize_generic(tcnp->cn_cred, 1715 KAUTH_GENERIC_ISSUSER, NULL) != 0 && 1716 kauth_cred_geteuid(tcnp->cn_cred) != tdp->i_uid && 1717 txp->i_uid != kauth_cred_geteuid(tcnp->cn_cred)) { 1718 error = EPERM; 1719 goto bad; 1720 } 1721 /* 1722 * Target must be empty if a directory and have no links 1723 * to it. Also, ensure source and target are compatible 1724 * (both directories, or both not directories). 1725 */ 1726 if ((txp->i_mode & IFMT) == IFDIR) { 1727 if (txp->i_nlink > 2 || 1728 !ufs_dirempty(txp, tdp->i_number, tcnp->cn_cred)) { 1729 error = ENOTEMPTY; 1730 goto bad; 1731 } 1732 if (!doingdirectory) { 1733 error = ENOTDIR; 1734 goto bad; 1735 } 1736 cache_purge(tdvp); 1737 } else if (doingdirectory) { 1738 error = EISDIR; 1739 goto bad; 1740 } 1741 if ((error = ufs_dirrewrite(tdp, to_ulr.ulr_offset, 1742 txp, ip->i_number, 1743 IFTODT(ip->i_mode), doingdirectory && newparent ? 1744 newparent : doingdirectory, IN_CHANGE | IN_UPDATE)) != 0) 1745 goto bad; 1746 if (doingdirectory) { 1747 /* 1748 * Truncate inode. The only stuff left in the directory 1749 * is "." and "..". The "." reference is inconsequential 1750 * since we are quashing it. We have removed the "." 1751 * reference and the reference in the parent directory, 1752 * but there may be other hard links. 1753 */ 1754 if (!newparent) { 1755 tdp->i_nlink--; 1756 DIP_ASSIGN(tdp, nlink, tdp->i_nlink); 1757 tdp->i_flag |= IN_CHANGE; 1758 UFS_WAPBL_UPDATE(tdvp, NULL, NULL, 0); 1759 } 1760 txp->i_nlink--; 1761 DIP_ASSIGN(txp, nlink, txp->i_nlink); 1762 txp->i_flag |= IN_CHANGE; 1763 if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC, 1764 tcnp->cn_cred))) 1765 goto bad; 1766 } 1767 VN_KNOTE(tdvp, NOTE_WRITE); 1768 VN_KNOTE(tvp, NOTE_DELETE); 1769 } 1770 1771 /* 1772 * Handle case where the directory entry we need to remove, 1773 * which is/was at from_ulr.ulr_offset, or the one before it, 1774 * which is/was at from_ulr.ulr_offset - from_ulr.ulr_count, 1775 * may have been moved when the directory insertion above 1776 * performed compaction. 1777 */ 1778 if (tdp->i_number == fdp->i_number && 1779 ulr_overlap(&from_ulr, &to_ulr)) { 1780 1781 struct buf *bp; 1782 struct direct *ep; 1783 struct ufsmount *ump = fdp->i_ump; 1784 doff_t curpos; 1785 doff_t endsearch; /* offset to end directory search */ 1786 uint32_t prev_reclen; 1787 int dirblksiz = ump->um_dirblksiz; 1788 const int needswap = UFS_MPNEEDSWAP(ump); 1789 u_long bmask; 1790 int namlen, entryoffsetinblock; 1791 char *dirbuf; 1792 1793 bmask = fdvp->v_mount->mnt_stat.f_iosize - 1; 1794 1795 /* 1796 * The fcnp entry will be somewhere between the start of 1797 * compaction (to_ulr.ulr_offset) and the original location 1798 * (from_ulr.ulr_offset). 1799 */ 1800 curpos = to_ulr.ulr_offset; 1801 endsearch = from_ulr.ulr_offset + from_ulr.ulr_reclen; 1802 entryoffsetinblock = 0; 1803 1804 /* 1805 * Get the directory block containing the start of 1806 * compaction. 1807 */ 1808 error = ufs_blkatoff(fdvp, (off_t)to_ulr.ulr_offset, &dirbuf, 1809 &bp, false); 1810 if (error) 1811 goto bad; 1812 1813 /* 1814 * Keep existing ulr_count (length of previous record) 1815 * for the case where compaction did not include the 1816 * previous entry but started at the from-entry. 1817 */ 1818 prev_reclen = from_ulr.ulr_count; 1819 1820 while (curpos < endsearch) { 1821 uint32_t reclen; 1822 1823 /* 1824 * If necessary, get the next directory block. 1825 * 1826 * dholland 7/13/11 to the best of my understanding 1827 * this should never happen; compaction occurs only 1828 * within single blocks. I think. 1829 */ 1830 if ((curpos & bmask) == 0) { 1831 if (bp != NULL) 1832 brelse(bp, 0); 1833 error = ufs_blkatoff(fdvp, (off_t)curpos, 1834 &dirbuf, &bp, false); 1835 if (error) 1836 goto bad; 1837 entryoffsetinblock = 0; 1838 } 1839 1840 KASSERT(bp != NULL); 1841 ep = (struct direct *)(dirbuf + entryoffsetinblock); 1842 reclen = ufs_rw16(ep->d_reclen, needswap); 1843 1844 #if (BYTE_ORDER == LITTLE_ENDIAN) 1845 if (FSFMT(fdvp) && needswap == 0) 1846 namlen = ep->d_type; 1847 else 1848 namlen = ep->d_namlen; 1849 #else 1850 if (FSFMT(fdvp) && needswap != 0) 1851 namlen = ep->d_type; 1852 else 1853 namlen = ep->d_namlen; 1854 #endif 1855 if ((ep->d_ino != 0) && 1856 (ufs_rw32(ep->d_ino, needswap) != WINO) && 1857 (namlen == fcnp->cn_namelen) && 1858 memcmp(ep->d_name, fcnp->cn_nameptr, namlen) == 0) { 1859 from_ulr.ulr_reclen = reclen; 1860 break; 1861 } 1862 curpos += reclen; 1863 entryoffsetinblock += reclen; 1864 prev_reclen = reclen; 1865 } 1866 1867 from_ulr.ulr_offset = curpos; 1868 from_ulr.ulr_count = prev_reclen; 1869 1870 KASSERT(curpos <= endsearch); 1871 1872 /* 1873 * If ulr_offset points to start of a directory block, 1874 * clear ulr_count so ufs_dirremove() doesn't try to 1875 * merge free space over a directory block boundary. 1876 */ 1877 if ((from_ulr.ulr_offset & (dirblksiz - 1)) == 0) 1878 from_ulr.ulr_count = 0; 1879 1880 brelse(bp, 0); 1881 } 1882 1883 /* 1884 * 3) Unlink the source. 1885 */ 1886 1887 #if 0 1888 /* 1889 * Ensure that the directory entry still exists and has not 1890 * changed while the new name has been entered. If the source is 1891 * a file then the entry may have been unlinked or renamed. In 1892 * either case there is no further work to be done. If the source 1893 * is a directory then it cannot have been rmdir'ed; The IRENAME 1894 * flag ensures that it cannot be moved by another rename or removed 1895 * by a rmdir. 1896 */ 1897 #endif 1898 KASSERT(fxp == ip); 1899 1900 /* 1901 * If the source is a directory with a new parent, the link 1902 * count of the old parent directory must be decremented and 1903 * ".." set to point to the new parent. 1904 */ 1905 if (doingdirectory && newparent) { 1906 KASSERT(fdp != NULL); 1907 ufs_dirrewrite(fxp, mastertemplate.dot_reclen, 1908 fdp, newparent, DT_DIR, 0, IN_CHANGE); 1909 cache_purge(fdvp); 1910 } 1911 error = ufs_dirremove(fdvp, &from_ulr, 1912 fxp, fcnp->cn_flags, 0); 1913 fxp->i_flag &= ~IN_RENAME; 1914 1915 VN_KNOTE(fvp, NOTE_RENAME); 1916 goto done; 1917 1918 out: 1919 goto out2; 1920 1921 /* exit routines from steps 1 & 2 */ 1922 bad: 1923 if (doingdirectory) 1924 ip->i_flag &= ~IN_RENAME; 1925 ip->i_nlink--; 1926 DIP_ASSIGN(ip, nlink, ip->i_nlink); 1927 ip->i_flag |= IN_CHANGE; 1928 ip->i_flag &= ~IN_RENAME; 1929 UFS_WAPBL_UPDATE(fvp, NULL, NULL, 0); 1930 done: 1931 UFS_WAPBL_END(fdvp->v_mount); 1932 out2: 1933 /* 1934 * clear IN_RENAME - some exit paths happen too early to go 1935 * through the cleanup done in the "bad" case above, so we 1936 * always do this mini-cleanup here. 1937 */ 1938 ip->i_flag &= ~IN_RENAME; 1939 1940 VOP_UNLOCK(fdvp); 1941 if (tdvp != fdvp) { 1942 VOP_UNLOCK(tdvp); 1943 } 1944 VOP_UNLOCK(fvp); 1945 if (tvp && tvp != fvp) { 1946 VOP_UNLOCK(tvp); 1947 } 1948 1949 vrele(fdvp); 1950 vrele(tdvp); 1951 vrele(fvp); 1952 if (tvp) { 1953 vrele(tvp); 1954 } 1955 1956 fstrans_done(mp); 1957 return (error); 1958 1959 abort_withlocks: 1960 VOP_UNLOCK(fdvp); 1961 if (tdvp != fdvp) { 1962 VOP_UNLOCK(tdvp); 1963 } 1964 VOP_UNLOCK(fvp); 1965 if (tvp && tvp != fvp) { 1966 VOP_UNLOCK(tvp); 1967 } 1968 1969 abort: 1970 VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */ 1971 VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */ 1972 vrele(tdvp); 1973 if (tvp) { 1974 vrele(tvp); 1975 } 1976 vrele(fdvp); 1977 if (fvp) { 1978 vrele(fvp); 1979 } 1980 return (error); 1981 } 1982 1983 int 1984 ufs_mkdir(void *v) 1985 { 1986 struct vop_mkdir_args /* { 1987 struct vnode *a_dvp; 1988 struct vnode **a_vpp; 1989 struct componentname *a_cnp; 1990 struct vattr *a_vap; 1991 } */ *ap = v; 1992 struct vnode *dvp = ap->a_dvp, *tvp; 1993 struct vattr *vap = ap->a_vap; 1994 struct componentname *cnp = ap->a_cnp; 1995 struct inode *ip, *dp = VTOI(dvp); 1996 struct buf *bp; 1997 struct dirtemplate dirtemplate; 1998 struct direct *newdir; 1999 int error, dmode; 2000 struct ufsmount *ump = dp->i_ump; 2001 int dirblksiz = ump->um_dirblksiz; 2002 struct ufs_lookup_results *ulr; 2003 2004 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 2005 2006 /* XXX should handle this material another way */ 2007 ulr = &dp->i_crap; 2008 UFS_CHECK_CRAPCOUNTER(dp); 2009 2010 if ((nlink_t)dp->i_nlink >= LINK_MAX) { 2011 error = EMLINK; 2012 goto out; 2013 } 2014 dmode = vap->va_mode & ACCESSPERMS; 2015 dmode |= IFDIR; 2016 /* 2017 * Must simulate part of ufs_makeinode here to acquire the inode, 2018 * but not have it entered in the parent directory. The entry is 2019 * made later after writing "." and ".." entries. 2020 */ 2021 if ((error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, ap->a_vpp)) != 0) 2022 goto out; 2023 2024 tvp = *ap->a_vpp; 2025 ip = VTOI(tvp); 2026 2027 error = UFS_WAPBL_BEGIN(ap->a_dvp->v_mount); 2028 if (error) { 2029 UFS_VFREE(tvp, ip->i_number, dmode); 2030 vput(tvp); 2031 goto out; 2032 } 2033 ip->i_uid = kauth_cred_geteuid(cnp->cn_cred); 2034 DIP_ASSIGN(ip, uid, ip->i_uid); 2035 ip->i_gid = dp->i_gid; 2036 DIP_ASSIGN(ip, gid, ip->i_gid); 2037 #if defined(QUOTA) || defined(QUOTA2) 2038 if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2039 UFS_VFREE(tvp, ip->i_number, dmode); 2040 UFS_WAPBL_END(dvp->v_mount); 2041 fstrans_done(dvp->v_mount); 2042 vput(tvp); 2043 vput(dvp); 2044 return (error); 2045 } 2046 #endif 2047 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 2048 ip->i_mode = dmode; 2049 DIP_ASSIGN(ip, mode, dmode); 2050 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ 2051 ip->i_nlink = 2; 2052 DIP_ASSIGN(ip, nlink, 2); 2053 if (cnp->cn_flags & ISWHITEOUT) { 2054 ip->i_flags |= UF_OPAQUE; 2055 DIP_ASSIGN(ip, flags, ip->i_flags); 2056 } 2057 2058 /* 2059 * Bump link count in parent directory to reflect work done below. 2060 * Should be done before reference is created so cleanup is 2061 * possible if we crash. 2062 */ 2063 dp->i_nlink++; 2064 DIP_ASSIGN(dp, nlink, dp->i_nlink); 2065 dp->i_flag |= IN_CHANGE; 2066 if ((error = UFS_UPDATE(dvp, NULL, NULL, UPDATE_DIROP)) != 0) 2067 goto bad; 2068 2069 /* 2070 * Initialize directory with "." and ".." from static template. 2071 */ 2072 dirtemplate = mastertemplate; 2073 dirtemplate.dotdot_reclen = dirblksiz - dirtemplate.dot_reclen; 2074 dirtemplate.dot_ino = ufs_rw32(ip->i_number, UFS_MPNEEDSWAP(ump)); 2075 dirtemplate.dotdot_ino = ufs_rw32(dp->i_number, UFS_MPNEEDSWAP(ump)); 2076 dirtemplate.dot_reclen = ufs_rw16(dirtemplate.dot_reclen, 2077 UFS_MPNEEDSWAP(ump)); 2078 dirtemplate.dotdot_reclen = ufs_rw16(dirtemplate.dotdot_reclen, 2079 UFS_MPNEEDSWAP(ump)); 2080 if (ump->um_maxsymlinklen <= 0) { 2081 #if BYTE_ORDER == LITTLE_ENDIAN 2082 if (UFS_MPNEEDSWAP(ump) == 0) 2083 #else 2084 if (UFS_MPNEEDSWAP(ump) != 0) 2085 #endif 2086 { 2087 dirtemplate.dot_type = dirtemplate.dot_namlen; 2088 dirtemplate.dotdot_type = dirtemplate.dotdot_namlen; 2089 dirtemplate.dot_namlen = dirtemplate.dotdot_namlen = 0; 2090 } else 2091 dirtemplate.dot_type = dirtemplate.dotdot_type = 0; 2092 } 2093 if ((error = UFS_BALLOC(tvp, (off_t)0, dirblksiz, cnp->cn_cred, 2094 B_CLRBUF, &bp)) != 0) 2095 goto bad; 2096 ip->i_size = dirblksiz; 2097 DIP_ASSIGN(ip, size, dirblksiz); 2098 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 2099 uvm_vnp_setsize(tvp, ip->i_size); 2100 memcpy((void *)bp->b_data, (void *)&dirtemplate, sizeof dirtemplate); 2101 2102 /* 2103 * Directory set up, now install it's entry in the parent directory. 2104 * We must write out the buffer containing the new directory body 2105 * before entering the new name in the parent. 2106 */ 2107 if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) 2108 goto bad; 2109 if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) { 2110 goto bad; 2111 } 2112 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 2113 ufs_makedirentry(ip, cnp, newdir); 2114 error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, bp); 2115 pool_cache_put(ufs_direct_cache, newdir); 2116 bad: 2117 if (error == 0) { 2118 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); 2119 UFS_WAPBL_END(dvp->v_mount); 2120 } else { 2121 dp->i_nlink--; 2122 DIP_ASSIGN(dp, nlink, dp->i_nlink); 2123 dp->i_flag |= IN_CHANGE; 2124 UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); 2125 /* 2126 * No need to do an explicit UFS_TRUNCATE here, vrele will 2127 * do this for us because we set the link count to 0. 2128 */ 2129 ip->i_nlink = 0; 2130 DIP_ASSIGN(ip, nlink, 0); 2131 ip->i_flag |= IN_CHANGE; 2132 /* If IN_ADIROP, account for it */ 2133 UFS_UNMARK_VNODE(tvp); 2134 UFS_WAPBL_UPDATE(tvp, NULL, NULL, UPDATE_DIROP); 2135 UFS_WAPBL_END(dvp->v_mount); 2136 vput(tvp); 2137 } 2138 out: 2139 fstrans_done(dvp->v_mount); 2140 vput(dvp); 2141 return (error); 2142 } 2143 2144 int 2145 ufs_rmdir(void *v) 2146 { 2147 struct vop_rmdir_args /* { 2148 struct vnode *a_dvp; 2149 struct vnode *a_vp; 2150 struct componentname *a_cnp; 2151 } */ *ap = v; 2152 struct vnode *vp, *dvp; 2153 struct componentname *cnp; 2154 struct inode *ip, *dp; 2155 int error; 2156 struct ufs_lookup_results *ulr; 2157 2158 vp = ap->a_vp; 2159 dvp = ap->a_dvp; 2160 cnp = ap->a_cnp; 2161 ip = VTOI(vp); 2162 dp = VTOI(dvp); 2163 2164 /* XXX should handle this material another way */ 2165 ulr = &dp->i_crap; 2166 UFS_CHECK_CRAPCOUNTER(dp); 2167 2168 /* 2169 * No rmdir "." or of mounted directories please. 2170 */ 2171 if (dp == ip || vp->v_mountedhere != NULL) { 2172 if (dp == ip) 2173 vrele(dvp); 2174 else 2175 vput(dvp); 2176 vput(vp); 2177 return (EINVAL); 2178 } 2179 2180 fstrans_start(dvp->v_mount, FSTRANS_SHARED); 2181 2182 /* 2183 * Do not remove a directory that is in the process of being renamed. 2184 * Verify that the directory is empty (and valid). (Rmdir ".." won't 2185 * be valid since ".." will contain a reference to the current 2186 * directory and thus be non-empty.) 2187 */ 2188 error = 0; 2189 if (ip->i_flag & IN_RENAME) { 2190 error = EINVAL; 2191 goto out; 2192 } 2193 if (ip->i_nlink != 2 || 2194 !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { 2195 error = ENOTEMPTY; 2196 goto out; 2197 } 2198 if ((dp->i_flags & APPEND) || 2199 (ip->i_flags & (IMMUTABLE | APPEND))) { 2200 error = EPERM; 2201 goto out; 2202 } 2203 error = UFS_WAPBL_BEGIN(dvp->v_mount); 2204 if (error) 2205 goto out; 2206 /* 2207 * Delete reference to directory before purging 2208 * inode. If we crash in between, the directory 2209 * will be reattached to lost+found, 2210 */ 2211 error = ufs_dirremove(dvp, ulr, ip, cnp->cn_flags, 1); 2212 if (error) { 2213 UFS_WAPBL_END(dvp->v_mount); 2214 goto out; 2215 } 2216 VN_KNOTE(dvp, NOTE_WRITE | NOTE_LINK); 2217 cache_purge(dvp); 2218 /* 2219 * Truncate inode. The only stuff left in the directory is "." and 2220 * "..". The "." reference is inconsequential since we're quashing 2221 * it. 2222 */ 2223 dp->i_nlink--; 2224 DIP_ASSIGN(dp, nlink, dp->i_nlink); 2225 dp->i_flag |= IN_CHANGE; 2226 UFS_WAPBL_UPDATE(dvp, NULL, NULL, UPDATE_DIROP); 2227 ip->i_nlink--; 2228 DIP_ASSIGN(ip, nlink, ip->i_nlink); 2229 ip->i_flag |= IN_CHANGE; 2230 error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred); 2231 cache_purge(vp); 2232 /* 2233 * Unlock the log while we still have reference to unlinked 2234 * directory vp so that it will not get locked for recycling 2235 */ 2236 UFS_WAPBL_END(dvp->v_mount); 2237 #ifdef UFS_DIRHASH 2238 if (ip->i_dirhash != NULL) 2239 ufsdirhash_free(ip); 2240 #endif 2241 out: 2242 VN_KNOTE(vp, NOTE_DELETE); 2243 vput(vp); 2244 fstrans_done(dvp->v_mount); 2245 vput(dvp); 2246 return (error); 2247 } 2248 2249 /* 2250 * symlink -- make a symbolic link 2251 */ 2252 int 2253 ufs_symlink(void *v) 2254 { 2255 struct vop_symlink_args /* { 2256 struct vnode *a_dvp; 2257 struct vnode **a_vpp; 2258 struct componentname *a_cnp; 2259 struct vattr *a_vap; 2260 char *a_target; 2261 } */ *ap = v; 2262 struct vnode *vp, **vpp; 2263 struct inode *ip; 2264 int len, error; 2265 struct ufs_lookup_results *ulr; 2266 2267 vpp = ap->a_vpp; 2268 2269 /* XXX should handle this material another way */ 2270 ulr = &VTOI(ap->a_dvp)->i_crap; 2271 UFS_CHECK_CRAPCOUNTER(VTOI(ap->a_dvp)); 2272 2273 /* 2274 * UFS_WAPBL_BEGIN1(dvp->v_mount, dvp) performed by successful 2275 * ufs_makeinode 2276 */ 2277 fstrans_start(ap->a_dvp->v_mount, FSTRANS_SHARED); 2278 error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, ulr, 2279 vpp, ap->a_cnp); 2280 if (error) 2281 goto out; 2282 VN_KNOTE(ap->a_dvp, NOTE_WRITE); 2283 vp = *vpp; 2284 len = strlen(ap->a_target); 2285 ip = VTOI(vp); 2286 if (len < ip->i_ump->um_maxsymlinklen) { 2287 memcpy((char *)SHORTLINK(ip), ap->a_target, len); 2288 ip->i_size = len; 2289 DIP_ASSIGN(ip, size, len); 2290 uvm_vnp_setsize(vp, ip->i_size); 2291 ip->i_flag |= IN_CHANGE | IN_UPDATE; 2292 if (vp->v_mount->mnt_flag & MNT_RELATIME) 2293 ip->i_flag |= IN_ACCESS; 2294 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 2295 } else 2296 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, 2297 UIO_SYSSPACE, IO_NODELOCKED | IO_JOURNALLOCKED, 2298 ap->a_cnp->cn_cred, NULL, NULL); 2299 UFS_WAPBL_END1(ap->a_dvp->v_mount, ap->a_dvp); 2300 if (error) 2301 vput(vp); 2302 out: 2303 fstrans_done(ap->a_dvp->v_mount); 2304 return (error); 2305 } 2306 2307 /* 2308 * Vnode op for reading directories. 2309 * 2310 * This routine handles converting from the on-disk directory format 2311 * "struct direct" to the in-memory format "struct dirent" as well as 2312 * byte swapping the entries if necessary. 2313 */ 2314 int 2315 ufs_readdir(void *v) 2316 { 2317 struct vop_readdir_args /* { 2318 struct vnode *a_vp; 2319 struct uio *a_uio; 2320 kauth_cred_t a_cred; 2321 int *a_eofflag; 2322 off_t **a_cookies; 2323 int *ncookies; 2324 } */ *ap = v; 2325 struct vnode *vp = ap->a_vp; 2326 struct direct *cdp, *ecdp; 2327 struct dirent *ndp; 2328 char *cdbuf, *ndbuf, *endp; 2329 struct uio auio, *uio; 2330 struct iovec aiov; 2331 int error; 2332 size_t count, ccount, rcount; 2333 off_t off, *ccp; 2334 off_t startoff; 2335 size_t skipbytes; 2336 struct ufsmount *ump = VFSTOUFS(vp->v_mount); 2337 int nswap = UFS_MPNEEDSWAP(ump); 2338 #if BYTE_ORDER == LITTLE_ENDIAN 2339 int needswap = ump->um_maxsymlinklen <= 0 && nswap == 0; 2340 #else 2341 int needswap = ump->um_maxsymlinklen <= 0 && nswap != 0; 2342 #endif 2343 uio = ap->a_uio; 2344 count = uio->uio_resid; 2345 rcount = count - ((uio->uio_offset + count) & (ump->um_dirblksiz - 1)); 2346 2347 if (rcount < _DIRENT_MINSIZE(cdp) || count < _DIRENT_MINSIZE(ndp)) 2348 return EINVAL; 2349 2350 startoff = uio->uio_offset & ~(ump->um_dirblksiz - 1); 2351 skipbytes = uio->uio_offset - startoff; 2352 rcount += skipbytes; 2353 2354 auio.uio_iov = &aiov; 2355 auio.uio_iovcnt = 1; 2356 auio.uio_offset = startoff; 2357 auio.uio_resid = rcount; 2358 UIO_SETUP_SYSSPACE(&auio); 2359 auio.uio_rw = UIO_READ; 2360 cdbuf = malloc(rcount, M_TEMP, M_WAITOK); 2361 aiov.iov_base = cdbuf; 2362 aiov.iov_len = rcount; 2363 error = VOP_READ(vp, &auio, 0, ap->a_cred); 2364 if (error != 0) { 2365 free(cdbuf, M_TEMP); 2366 return error; 2367 } 2368 2369 rcount -= auio.uio_resid; 2370 2371 cdp = (struct direct *)(void *)cdbuf; 2372 ecdp = (struct direct *)(void *)&cdbuf[rcount]; 2373 2374 ndbuf = malloc(count, M_TEMP, M_WAITOK); 2375 ndp = (struct dirent *)(void *)ndbuf; 2376 endp = &ndbuf[count]; 2377 2378 off = uio->uio_offset; 2379 if (ap->a_cookies) { 2380 ccount = rcount / _DIRENT_RECLEN(cdp, 1); 2381 ccp = *(ap->a_cookies) = malloc(ccount * sizeof(*ccp), 2382 M_TEMP, M_WAITOK); 2383 } else { 2384 /* XXX: GCC */ 2385 ccount = 0; 2386 ccp = NULL; 2387 } 2388 2389 while (cdp < ecdp) { 2390 cdp->d_reclen = ufs_rw16(cdp->d_reclen, nswap); 2391 if (skipbytes > 0) { 2392 if (cdp->d_reclen <= skipbytes) { 2393 skipbytes -= cdp->d_reclen; 2394 cdp = _DIRENT_NEXT(cdp); 2395 continue; 2396 } 2397 /* 2398 * invalid cookie. 2399 */ 2400 error = EINVAL; 2401 goto out; 2402 } 2403 if (cdp->d_reclen == 0) { 2404 struct dirent *ondp = ndp; 2405 ndp->d_reclen = _DIRENT_MINSIZE(ndp); 2406 ndp = _DIRENT_NEXT(ndp); 2407 ondp->d_reclen = 0; 2408 cdp = ecdp; 2409 break; 2410 } 2411 if (needswap) { 2412 ndp->d_type = cdp->d_namlen; 2413 ndp->d_namlen = cdp->d_type; 2414 } else { 2415 ndp->d_type = cdp->d_type; 2416 ndp->d_namlen = cdp->d_namlen; 2417 } 2418 ndp->d_reclen = _DIRENT_RECLEN(ndp, ndp->d_namlen); 2419 if ((char *)(void *)ndp + ndp->d_reclen + 2420 _DIRENT_MINSIZE(ndp) > endp) 2421 break; 2422 ndp->d_fileno = ufs_rw32(cdp->d_ino, nswap); 2423 (void)memcpy(ndp->d_name, cdp->d_name, ndp->d_namlen); 2424 memset(&ndp->d_name[ndp->d_namlen], 0, 2425 ndp->d_reclen - _DIRENT_NAMEOFF(ndp) - ndp->d_namlen); 2426 off += cdp->d_reclen; 2427 if (ap->a_cookies) { 2428 KASSERT(ccp - *(ap->a_cookies) < ccount); 2429 *(ccp++) = off; 2430 } 2431 ndp = _DIRENT_NEXT(ndp); 2432 cdp = _DIRENT_NEXT(cdp); 2433 } 2434 2435 count = ((char *)(void *)ndp - ndbuf); 2436 error = uiomove(ndbuf, count, uio); 2437 out: 2438 if (ap->a_cookies) { 2439 if (error) { 2440 free(*(ap->a_cookies), M_TEMP); 2441 *(ap->a_cookies) = NULL; 2442 *(ap->a_ncookies) = 0; 2443 } else { 2444 *ap->a_ncookies = ccp - *(ap->a_cookies); 2445 } 2446 } 2447 uio->uio_offset = off; 2448 free(ndbuf, M_TEMP); 2449 free(cdbuf, M_TEMP); 2450 *ap->a_eofflag = VTOI(vp)->i_size <= uio->uio_offset; 2451 return error; 2452 } 2453 2454 /* 2455 * Return target name of a symbolic link 2456 */ 2457 int 2458 ufs_readlink(void *v) 2459 { 2460 struct vop_readlink_args /* { 2461 struct vnode *a_vp; 2462 struct uio *a_uio; 2463 kauth_cred_t a_cred; 2464 } */ *ap = v; 2465 struct vnode *vp = ap->a_vp; 2466 struct inode *ip = VTOI(vp); 2467 struct ufsmount *ump = VFSTOUFS(vp->v_mount); 2468 int isize; 2469 2470 isize = ip->i_size; 2471 if (isize < ump->um_maxsymlinklen || 2472 (ump->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) { 2473 uiomove((char *)SHORTLINK(ip), isize, ap->a_uio); 2474 return (0); 2475 } 2476 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); 2477 } 2478 2479 /* 2480 * Calculate the logical to physical mapping if not done already, 2481 * then call the device strategy routine. 2482 */ 2483 int 2484 ufs_strategy(void *v) 2485 { 2486 struct vop_strategy_args /* { 2487 struct vnode *a_vp; 2488 struct buf *a_bp; 2489 } */ *ap = v; 2490 struct buf *bp; 2491 struct vnode *vp; 2492 struct inode *ip; 2493 struct mount *mp; 2494 int error; 2495 2496 bp = ap->a_bp; 2497 vp = ap->a_vp; 2498 ip = VTOI(vp); 2499 if (vp->v_type == VBLK || vp->v_type == VCHR) 2500 panic("ufs_strategy: spec"); 2501 KASSERT(bp->b_bcount != 0); 2502 if (bp->b_blkno == bp->b_lblkno) { 2503 error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, 2504 NULL); 2505 if (error) { 2506 bp->b_error = error; 2507 biodone(bp); 2508 return (error); 2509 } 2510 if (bp->b_blkno == -1) /* no valid data */ 2511 clrbuf(bp); 2512 } 2513 if (bp->b_blkno < 0) { /* block is not on disk */ 2514 biodone(bp); 2515 return (0); 2516 } 2517 vp = ip->i_devvp; 2518 2519 error = VOP_STRATEGY(vp, bp); 2520 if (error) 2521 return error; 2522 2523 if (!BUF_ISREAD(bp)) 2524 return 0; 2525 2526 mp = wapbl_vptomp(vp); 2527 if (mp == NULL || mp->mnt_wapbl_replay == NULL || 2528 !WAPBL_REPLAY_ISOPEN(mp) || 2529 !WAPBL_REPLAY_CAN_READ(mp, bp->b_blkno, bp->b_bcount)) 2530 return 0; 2531 2532 error = biowait(bp); 2533 if (error) 2534 return error; 2535 2536 error = WAPBL_REPLAY_READ(mp, bp->b_data, bp->b_blkno, bp->b_bcount); 2537 if (error) { 2538 mutex_enter(&bufcache_lock); 2539 SET(bp->b_cflags, BC_INVAL); 2540 mutex_exit(&bufcache_lock); 2541 } 2542 return error; 2543 } 2544 2545 /* 2546 * Print out the contents of an inode. 2547 */ 2548 int 2549 ufs_print(void *v) 2550 { 2551 struct vop_print_args /* { 2552 struct vnode *a_vp; 2553 } */ *ap = v; 2554 struct vnode *vp; 2555 struct inode *ip; 2556 2557 vp = ap->a_vp; 2558 ip = VTOI(vp); 2559 printf("tag VT_UFS, ino %llu, on dev %llu, %llu", 2560 (unsigned long long)ip->i_number, 2561 (unsigned long long)major(ip->i_dev), 2562 (unsigned long long)minor(ip->i_dev)); 2563 printf(" flags 0x%x, nlink %d\n", 2564 ip->i_flag, ip->i_nlink); 2565 printf("\tmode 0%o, owner %d, group %d, size %qd", 2566 ip->i_mode, ip->i_uid, ip->i_gid, 2567 (long long)ip->i_size); 2568 if (vp->v_type == VFIFO) 2569 VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v); 2570 printf("\n"); 2571 return (0); 2572 } 2573 2574 /* 2575 * Read wrapper for special devices. 2576 */ 2577 int 2578 ufsspec_read(void *v) 2579 { 2580 struct vop_read_args /* { 2581 struct vnode *a_vp; 2582 struct uio *a_uio; 2583 int a_ioflag; 2584 kauth_cred_t a_cred; 2585 } */ *ap = v; 2586 2587 /* 2588 * Set access flag. 2589 */ 2590 if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0) 2591 VTOI(ap->a_vp)->i_flag |= IN_ACCESS; 2592 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap)); 2593 } 2594 2595 /* 2596 * Write wrapper for special devices. 2597 */ 2598 int 2599 ufsspec_write(void *v) 2600 { 2601 struct vop_write_args /* { 2602 struct vnode *a_vp; 2603 struct uio *a_uio; 2604 int a_ioflag; 2605 kauth_cred_t a_cred; 2606 } */ *ap = v; 2607 2608 /* 2609 * Set update and change flags. 2610 */ 2611 if ((ap->a_vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0) 2612 VTOI(ap->a_vp)->i_flag |= IN_MODIFY; 2613 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap)); 2614 } 2615 2616 /* 2617 * Close wrapper for special devices. 2618 * 2619 * Update the times on the inode then do device close. 2620 */ 2621 int 2622 ufsspec_close(void *v) 2623 { 2624 struct vop_close_args /* { 2625 struct vnode *a_vp; 2626 int a_fflag; 2627 kauth_cred_t a_cred; 2628 } */ *ap = v; 2629 struct vnode *vp; 2630 struct inode *ip; 2631 2632 vp = ap->a_vp; 2633 ip = VTOI(vp); 2634 if (vp->v_usecount > 1) 2635 UFS_ITIMES(vp, NULL, NULL, NULL); 2636 return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); 2637 } 2638 2639 /* 2640 * Read wrapper for fifo's 2641 */ 2642 int 2643 ufsfifo_read(void *v) 2644 { 2645 struct vop_read_args /* { 2646 struct vnode *a_vp; 2647 struct uio *a_uio; 2648 int a_ioflag; 2649 kauth_cred_t a_cred; 2650 } */ *ap = v; 2651 2652 /* 2653 * Set access flag. 2654 */ 2655 VTOI(ap->a_vp)->i_flag |= IN_ACCESS; 2656 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap)); 2657 } 2658 2659 /* 2660 * Write wrapper for fifo's. 2661 */ 2662 int 2663 ufsfifo_write(void *v) 2664 { 2665 struct vop_write_args /* { 2666 struct vnode *a_vp; 2667 struct uio *a_uio; 2668 int a_ioflag; 2669 kauth_cred_t a_cred; 2670 } */ *ap = v; 2671 2672 /* 2673 * Set update and change flags. 2674 */ 2675 VTOI(ap->a_vp)->i_flag |= IN_MODIFY; 2676 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap)); 2677 } 2678 2679 /* 2680 * Close wrapper for fifo's. 2681 * 2682 * Update the times on the inode then do device close. 2683 */ 2684 int 2685 ufsfifo_close(void *v) 2686 { 2687 struct vop_close_args /* { 2688 struct vnode *a_vp; 2689 int a_fflag; 2690 kauth_cred_t a_cred; 2691 } */ *ap = v; 2692 struct vnode *vp; 2693 struct inode *ip; 2694 2695 vp = ap->a_vp; 2696 ip = VTOI(vp); 2697 if (ap->a_vp->v_usecount > 1) 2698 UFS_ITIMES(vp, NULL, NULL, NULL); 2699 return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); 2700 } 2701 2702 /* 2703 * Return POSIX pathconf information applicable to ufs filesystems. 2704 */ 2705 int 2706 ufs_pathconf(void *v) 2707 { 2708 struct vop_pathconf_args /* { 2709 struct vnode *a_vp; 2710 int a_name; 2711 register_t *a_retval; 2712 } */ *ap = v; 2713 2714 switch (ap->a_name) { 2715 case _PC_LINK_MAX: 2716 *ap->a_retval = LINK_MAX; 2717 return (0); 2718 case _PC_NAME_MAX: 2719 *ap->a_retval = FFS_MAXNAMLEN; 2720 return (0); 2721 case _PC_PATH_MAX: 2722 *ap->a_retval = PATH_MAX; 2723 return (0); 2724 case _PC_PIPE_BUF: 2725 *ap->a_retval = PIPE_BUF; 2726 return (0); 2727 case _PC_CHOWN_RESTRICTED: 2728 *ap->a_retval = 1; 2729 return (0); 2730 case _PC_NO_TRUNC: 2731 *ap->a_retval = 1; 2732 return (0); 2733 case _PC_SYNC_IO: 2734 *ap->a_retval = 1; 2735 return (0); 2736 case _PC_FILESIZEBITS: 2737 *ap->a_retval = 42; 2738 return (0); 2739 case _PC_SYMLINK_MAX: 2740 *ap->a_retval = MAXPATHLEN; 2741 return (0); 2742 case _PC_2_SYMLINKS: 2743 *ap->a_retval = 1; 2744 return (0); 2745 default: 2746 return (EINVAL); 2747 } 2748 /* NOTREACHED */ 2749 } 2750 2751 /* 2752 * Advisory record locking support 2753 */ 2754 int 2755 ufs_advlock(void *v) 2756 { 2757 struct vop_advlock_args /* { 2758 struct vnode *a_vp; 2759 void * a_id; 2760 int a_op; 2761 struct flock *a_fl; 2762 int a_flags; 2763 } */ *ap = v; 2764 struct inode *ip; 2765 2766 ip = VTOI(ap->a_vp); 2767 return lf_advlock(ap, &ip->i_lockf, ip->i_size); 2768 } 2769 2770 /* 2771 * Initialize the vnode associated with a new inode, handle aliased 2772 * vnodes. 2773 */ 2774 void 2775 ufs_vinit(struct mount *mntp, int (**specops)(void *), int (**fifoops)(void *), 2776 struct vnode **vpp) 2777 { 2778 struct timeval tv; 2779 struct inode *ip; 2780 struct vnode *vp; 2781 dev_t rdev; 2782 struct ufsmount *ump; 2783 2784 vp = *vpp; 2785 ip = VTOI(vp); 2786 switch(vp->v_type = IFTOVT(ip->i_mode)) { 2787 case VCHR: 2788 case VBLK: 2789 vp->v_op = specops; 2790 ump = ip->i_ump; 2791 if (ump->um_fstype == UFS1) 2792 rdev = (dev_t)ufs_rw32(ip->i_ffs1_rdev, 2793 UFS_MPNEEDSWAP(ump)); 2794 else 2795 rdev = (dev_t)ufs_rw64(ip->i_ffs2_rdev, 2796 UFS_MPNEEDSWAP(ump)); 2797 spec_node_init(vp, rdev); 2798 break; 2799 case VFIFO: 2800 vp->v_op = fifoops; 2801 break; 2802 case VNON: 2803 case VBAD: 2804 case VSOCK: 2805 case VLNK: 2806 case VDIR: 2807 case VREG: 2808 break; 2809 } 2810 if (ip->i_number == ROOTINO) 2811 vp->v_vflag |= VV_ROOT; 2812 /* 2813 * Initialize modrev times 2814 */ 2815 getmicrouptime(&tv); 2816 ip->i_modrev = (uint64_t)(uint)tv.tv_sec << 32 2817 | tv.tv_usec * 4294u; 2818 *vpp = vp; 2819 } 2820 2821 /* 2822 * Allocate a new inode. 2823 */ 2824 int 2825 ufs_makeinode(int mode, struct vnode *dvp, const struct ufs_lookup_results *ulr, 2826 struct vnode **vpp, struct componentname *cnp) 2827 { 2828 struct inode *ip, *pdir; 2829 struct direct *newdir; 2830 struct vnode *tvp; 2831 int error, ismember = 0; 2832 2833 UFS_WAPBL_JUNLOCK_ASSERT(dvp->v_mount); 2834 2835 pdir = VTOI(dvp); 2836 2837 if ((mode & IFMT) == 0) 2838 mode |= IFREG; 2839 2840 if ((error = UFS_VALLOC(dvp, mode, cnp->cn_cred, vpp)) != 0) { 2841 vput(dvp); 2842 return (error); 2843 } 2844 tvp = *vpp; 2845 ip = VTOI(tvp); 2846 ip->i_gid = pdir->i_gid; 2847 DIP_ASSIGN(ip, gid, ip->i_gid); 2848 ip->i_uid = kauth_cred_geteuid(cnp->cn_cred); 2849 DIP_ASSIGN(ip, uid, ip->i_uid); 2850 error = UFS_WAPBL_BEGIN1(dvp->v_mount, dvp); 2851 if (error) { 2852 /* 2853 * Note, we can't VOP_VFREE(tvp) here like we should 2854 * because we can't write to the disk. Instead, we leave 2855 * the vnode dangling from the journal. 2856 */ 2857 vput(tvp); 2858 vput(dvp); 2859 return (error); 2860 } 2861 #if defined(QUOTA) || defined(QUOTA2) 2862 if ((error = chkiq(ip, 1, cnp->cn_cred, 0))) { 2863 UFS_VFREE(tvp, ip->i_number, mode); 2864 UFS_WAPBL_END1(dvp->v_mount, dvp); 2865 vput(tvp); 2866 vput(dvp); 2867 return (error); 2868 } 2869 #endif 2870 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; 2871 ip->i_mode = mode; 2872 DIP_ASSIGN(ip, mode, mode); 2873 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ 2874 ip->i_nlink = 1; 2875 DIP_ASSIGN(ip, nlink, 1); 2876 if ((ip->i_mode & ISGID) && (kauth_cred_ismember_gid(cnp->cn_cred, 2877 ip->i_gid, &ismember) != 0 || !ismember) && 2878 kauth_authorize_generic(cnp->cn_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 2879 ip->i_mode &= ~ISGID; 2880 DIP_ASSIGN(ip, mode, ip->i_mode); 2881 } 2882 2883 if (cnp->cn_flags & ISWHITEOUT) { 2884 ip->i_flags |= UF_OPAQUE; 2885 DIP_ASSIGN(ip, flags, ip->i_flags); 2886 } 2887 2888 /* 2889 * Make sure inode goes to disk before directory entry. 2890 */ 2891 if ((error = UFS_UPDATE(tvp, NULL, NULL, UPDATE_DIROP)) != 0) 2892 goto bad; 2893 newdir = pool_cache_get(ufs_direct_cache, PR_WAITOK); 2894 ufs_makedirentry(ip, cnp, newdir); 2895 error = ufs_direnter(dvp, ulr, tvp, newdir, cnp, NULL); 2896 pool_cache_put(ufs_direct_cache, newdir); 2897 if (error) 2898 goto bad; 2899 vput(dvp); 2900 *vpp = tvp; 2901 return (0); 2902 2903 bad: 2904 /* 2905 * Write error occurred trying to update the inode 2906 * or the directory so must deallocate the inode. 2907 */ 2908 ip->i_nlink = 0; 2909 DIP_ASSIGN(ip, nlink, 0); 2910 ip->i_flag |= IN_CHANGE; 2911 /* If IN_ADIROP, account for it */ 2912 UFS_UNMARK_VNODE(tvp); 2913 UFS_WAPBL_UPDATE(tvp, NULL, NULL, 0); 2914 tvp->v_type = VNON; /* explodes later if VBLK */ 2915 UFS_WAPBL_END1(dvp->v_mount, dvp); 2916 vput(tvp); 2917 vput(dvp); 2918 return (error); 2919 } 2920 2921 /* 2922 * Allocate len bytes at offset off. 2923 */ 2924 int 2925 ufs_gop_alloc(struct vnode *vp, off_t off, off_t len, int flags, 2926 kauth_cred_t cred) 2927 { 2928 struct inode *ip = VTOI(vp); 2929 int error, delta, bshift, bsize; 2930 UVMHIST_FUNC("ufs_gop_alloc"); UVMHIST_CALLED(ubchist); 2931 2932 error = 0; 2933 bshift = vp->v_mount->mnt_fs_bshift; 2934 bsize = 1 << bshift; 2935 2936 delta = off & (bsize - 1); 2937 off -= delta; 2938 len += delta; 2939 2940 while (len > 0) { 2941 bsize = MIN(bsize, len); 2942 2943 error = UFS_BALLOC(vp, off, bsize, cred, flags, NULL); 2944 if (error) { 2945 goto out; 2946 } 2947 2948 /* 2949 * increase file size now, UFS_BALLOC() requires that 2950 * EOF be up-to-date before each call. 2951 */ 2952 2953 if (ip->i_size < off + bsize) { 2954 UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x", 2955 vp, ip->i_size, off + bsize, 0); 2956 ip->i_size = off + bsize; 2957 DIP_ASSIGN(ip, size, ip->i_size); 2958 } 2959 2960 off += bsize; 2961 len -= bsize; 2962 } 2963 2964 out: 2965 UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); 2966 return error; 2967 } 2968 2969 void 2970 ufs_gop_markupdate(struct vnode *vp, int flags) 2971 { 2972 u_int32_t mask = 0; 2973 2974 if ((flags & GOP_UPDATE_ACCESSED) != 0) { 2975 mask = IN_ACCESS; 2976 } 2977 if ((flags & GOP_UPDATE_MODIFIED) != 0) { 2978 if (vp->v_type == VREG) { 2979 mask |= IN_CHANGE | IN_UPDATE; 2980 } else { 2981 mask |= IN_MODIFY; 2982 } 2983 } 2984 if (mask) { 2985 struct inode *ip = VTOI(vp); 2986 2987 ip->i_flag |= mask; 2988 } 2989 } 2990