1 /* $NetBSD: union_subr.c,v 1.2 2003/03/17 09:11:30 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 1994 Jan-Simon Pendry 5 * Copyright (c) 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Jan-Simon Pendry. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 40 */ 41 42 #include <sys/cdefs.h> 43 __KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.2 2003/03/17 09:11:30 jdolecek Exp $"); 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/proc.h> 48 #include <sys/time.h> 49 #include <sys/kernel.h> 50 #include <sys/vnode.h> 51 #include <sys/namei.h> 52 #include <sys/malloc.h> 53 #include <sys/file.h> 54 #include <sys/filedesc.h> 55 #include <sys/queue.h> 56 #include <sys/mount.h> 57 #include <sys/stat.h> 58 59 #include <uvm/uvm_extern.h> 60 61 #include <fs/union/union.h> 62 63 #ifdef DIAGNOSTIC 64 #include <sys/proc.h> 65 #endif 66 67 /* must be power of two, otherwise change UNION_HASH() */ 68 #define NHASH 32 69 70 /* unsigned int ... */ 71 #define UNION_HASH(u, l) \ 72 (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1)) 73 74 static LIST_HEAD(unhead, union_node) unhead[NHASH]; 75 static int unvplock[NHASH]; 76 77 static int union_list_lock __P((int)); 78 static void union_list_unlock __P((int)); 79 void union_updatevp __P((struct union_node *, struct vnode *, struct vnode *)); 80 static int union_relookup __P((struct union_mount *, struct vnode *, 81 struct vnode **, struct componentname *, 82 struct componentname *, const char *, int)); 83 int union_vn_close __P((struct vnode *, int, struct ucred *, struct proc *)); 84 static void union_dircache_r __P((struct vnode *, struct vnode ***, int *)); 85 struct vnode *union_dircache __P((struct vnode *, struct proc *)); 86 87 void 88 union_init() 89 { 90 int i; 91 92 for (i = 0; i < NHASH; i++) 93 LIST_INIT(&unhead[i]); 94 memset((caddr_t) unvplock, 0, sizeof(unvplock)); 95 } 96 97 /* 98 * Free global unionfs resources. 99 */ 100 void 101 union_done() 102 { 103 104 /* Make sure to unset the readdir hook. */ 105 vn_union_readdir_hook = NULL; 106 } 107 108 static int 109 union_list_lock(ix) 110 int ix; 111 { 112 113 if (unvplock[ix] & UN_LOCKED) { 114 unvplock[ix] |= UN_WANTED; 115 (void) tsleep(&unvplock[ix], PINOD, "unionlk", 0); 116 return (1); 117 } 118 119 unvplock[ix] |= UN_LOCKED; 120 121 return (0); 122 } 123 124 static void 125 union_list_unlock(ix) 126 int ix; 127 { 128 129 unvplock[ix] &= ~UN_LOCKED; 130 131 if (unvplock[ix] & UN_WANTED) { 132 unvplock[ix] &= ~UN_WANTED; 133 wakeup((caddr_t) &unvplock[ix]); 134 } 135 } 136 137 void 138 union_updatevp(un, uppervp, lowervp) 139 struct union_node *un; 140 struct vnode *uppervp; 141 struct vnode *lowervp; 142 { 143 int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); 144 int nhash = UNION_HASH(uppervp, lowervp); 145 int docache = (lowervp != NULLVP || uppervp != NULLVP); 146 int lhash, uhash; 147 148 /* 149 * Ensure locking is ordered from lower to higher 150 * to avoid deadlocks. 151 */ 152 if (nhash < ohash) { 153 lhash = nhash; 154 uhash = ohash; 155 } else { 156 lhash = ohash; 157 uhash = nhash; 158 } 159 160 if (lhash != uhash) 161 while (union_list_lock(lhash)) 162 continue; 163 164 while (union_list_lock(uhash)) 165 continue; 166 167 if (ohash != nhash || !docache) { 168 if (un->un_flags & UN_CACHED) { 169 un->un_flags &= ~UN_CACHED; 170 LIST_REMOVE(un, un_cache); 171 } 172 } 173 174 if (ohash != nhash) 175 union_list_unlock(ohash); 176 177 if (un->un_lowervp != lowervp) { 178 if (un->un_lowervp) { 179 vrele(un->un_lowervp); 180 if (un->un_path) { 181 free(un->un_path, M_TEMP); 182 un->un_path = 0; 183 } 184 if (un->un_dirvp) { 185 vrele(un->un_dirvp); 186 un->un_dirvp = NULLVP; 187 } 188 } 189 un->un_lowervp = lowervp; 190 un->un_lowersz = VNOVAL; 191 } 192 193 if (un->un_uppervp != uppervp) { 194 if (un->un_uppervp) 195 vrele(un->un_uppervp); 196 197 un->un_uppervp = uppervp; 198 un->un_uppersz = VNOVAL; 199 } 200 201 if (docache && (ohash != nhash)) { 202 LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); 203 un->un_flags |= UN_CACHED; 204 } 205 206 union_list_unlock(nhash); 207 } 208 209 void 210 union_newlower(un, lowervp) 211 struct union_node *un; 212 struct vnode *lowervp; 213 { 214 215 union_updatevp(un, un->un_uppervp, lowervp); 216 } 217 218 void 219 union_newupper(un, uppervp) 220 struct union_node *un; 221 struct vnode *uppervp; 222 { 223 224 union_updatevp(un, uppervp, un->un_lowervp); 225 } 226 227 /* 228 * Keep track of size changes in the underlying vnodes. 229 * If the size changes, then callback to the vm layer 230 * giving priority to the upper layer size. 231 */ 232 void 233 union_newsize(vp, uppersz, lowersz) 234 struct vnode *vp; 235 off_t uppersz, lowersz; 236 { 237 struct union_node *un; 238 off_t sz; 239 240 /* only interested in regular files */ 241 if (vp->v_type != VREG) 242 return; 243 244 un = VTOUNION(vp); 245 sz = VNOVAL; 246 247 if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { 248 un->un_uppersz = uppersz; 249 if (sz == VNOVAL) 250 sz = un->un_uppersz; 251 } 252 253 if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { 254 un->un_lowersz = lowersz; 255 if (sz == VNOVAL) 256 sz = un->un_lowersz; 257 } 258 259 if (sz != VNOVAL) { 260 #ifdef UNION_DIAGNOSTIC 261 printf("union: %s size now %qd\n", 262 uppersz != VNOVAL ? "upper" : "lower", sz); 263 #endif 264 uvm_vnp_setsize(vp, sz); 265 } 266 } 267 268 /* 269 * allocate a union_node/vnode pair. the vnode is 270 * referenced and locked. the new vnode is returned 271 * via (vpp). (mp) is the mountpoint of the union filesystem, 272 * (dvp) is the parent directory where the upper layer object 273 * should exist (but doesn't) and (cnp) is the componentname 274 * information which is partially copied to allow the upper 275 * layer object to be created at a later time. (uppervp) 276 * and (lowervp) reference the upper and lower layer objects 277 * being mapped. either, but not both, can be nil. 278 * if supplied, (uppervp) is locked. 279 * the reference is either maintained in the new union_node 280 * object which is allocated, or they are vrele'd. 281 * 282 * all union_nodes are maintained on a singly-linked 283 * list. new nodes are only allocated when they cannot 284 * be found on this list. entries on the list are 285 * removed when the vfs reclaim entry is called. 286 * 287 * a single lock is kept for the entire list. this is 288 * needed because the getnewvnode() function can block 289 * waiting for a vnode to become free, in which case there 290 * may be more than one process trying to get the same 291 * vnode. this lock is only taken if we are going to 292 * call getnewvnode, since the kernel itself is single-threaded. 293 * 294 * if an entry is found on the list, then call vget() to 295 * take a reference. this is done because there may be 296 * zero references to it and so it needs to removed from 297 * the vnode free list. 298 */ 299 int 300 union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) 301 struct vnode **vpp; 302 struct mount *mp; 303 struct vnode *undvp; /* parent union vnode */ 304 struct vnode *dvp; /* may be null */ 305 struct componentname *cnp; /* may be null */ 306 struct vnode *uppervp; /* may be null */ 307 struct vnode *lowervp; /* may be null */ 308 int docache; 309 { 310 int error; 311 struct union_node *un = NULL; 312 struct vnode *xlowervp = NULLVP; 313 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 314 int hash = 0; 315 int vflag; 316 int try; 317 318 if (uppervp == NULLVP && lowervp == NULLVP) 319 panic("union: unidentifiable allocation"); 320 321 if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { 322 xlowervp = lowervp; 323 lowervp = NULLVP; 324 } 325 326 /* detect the root vnode (and aliases) */ 327 vflag = VLAYER; 328 if ((uppervp == um->um_uppervp) && 329 ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { 330 if (lowervp == NULLVP) { 331 lowervp = um->um_lowervp; 332 if (lowervp != NULLVP) 333 VREF(lowervp); 334 } 335 vflag = VROOT; 336 } 337 338 loop: 339 if (!docache) { 340 un = 0; 341 } else for (try = 0; try < 3; try++) { 342 switch (try) { 343 case 0: 344 if (lowervp == NULLVP) 345 continue; 346 hash = UNION_HASH(uppervp, lowervp); 347 break; 348 349 case 1: 350 if (uppervp == NULLVP) 351 continue; 352 hash = UNION_HASH(uppervp, NULLVP); 353 break; 354 355 case 2: 356 if (lowervp == NULLVP) 357 continue; 358 hash = UNION_HASH(NULLVP, lowervp); 359 break; 360 } 361 362 while (union_list_lock(hash)) 363 continue; 364 365 for (un = unhead[hash].lh_first; un != 0; 366 un = un->un_cache.le_next) { 367 if ((un->un_lowervp == lowervp || 368 un->un_lowervp == NULLVP) && 369 (un->un_uppervp == uppervp || 370 un->un_uppervp == NULLVP) && 371 (UNIONTOV(un)->v_mount == mp)) { 372 if (vget(UNIONTOV(un), 0)) { 373 union_list_unlock(hash); 374 goto loop; 375 } 376 break; 377 } 378 } 379 380 union_list_unlock(hash); 381 382 if (un) 383 break; 384 } 385 386 if (un) { 387 /* 388 * Obtain a lock on the union_node. 389 * uppervp is locked, though un->un_uppervp 390 * may not be. this doesn't break the locking 391 * hierarchy since in the case that un->un_uppervp 392 * is not yet locked it will be vrele'd and replaced 393 * with uppervp. 394 */ 395 396 if ((dvp != NULLVP) && (uppervp == dvp)) { 397 /* 398 * Access ``.'', so (un) will already 399 * be locked. Since this process has 400 * the lock on (uppervp) no other 401 * process can hold the lock on (un). 402 */ 403 #ifdef DIAGNOSTIC 404 if ((un->un_flags & UN_LOCKED) == 0) 405 panic("union: . not locked"); 406 else if (curproc && un->un_pid != curproc->p_pid && 407 un->un_pid > -1 && curproc->p_pid > -1) 408 panic("union: allocvp not lock owner"); 409 #endif 410 } else { 411 if (un->un_flags & UN_LOCKED) { 412 vrele(UNIONTOV(un)); 413 un->un_flags |= UN_WANTED; 414 (void) tsleep(&un->un_flags, PINOD, 415 "unionalloc", 0); 416 goto loop; 417 } 418 un->un_flags |= UN_LOCKED; 419 420 #ifdef DIAGNOSTIC 421 if (curproc) 422 un->un_pid = curproc->p_pid; 423 else 424 un->un_pid = -1; 425 #endif 426 } 427 428 /* 429 * At this point, the union_node is locked, 430 * un->un_uppervp may not be locked, and uppervp 431 * is locked or nil. 432 */ 433 434 /* 435 * Save information about the upper layer. 436 */ 437 if (uppervp != un->un_uppervp) { 438 union_newupper(un, uppervp); 439 } else if (uppervp) { 440 vrele(uppervp); 441 } 442 443 if (un->un_uppervp) { 444 un->un_flags |= UN_ULOCK; 445 un->un_flags &= ~UN_KLOCK; 446 } 447 448 /* 449 * Save information about the lower layer. 450 * This needs to keep track of pathname 451 * and directory information which union_vn_create 452 * might need. 453 */ 454 if (lowervp != un->un_lowervp) { 455 union_newlower(un, lowervp); 456 if (cnp && (lowervp != NULLVP)) { 457 un->un_hash = cnp->cn_hash; 458 un->un_path = malloc(cnp->cn_namelen+1, 459 M_TEMP, M_WAITOK); 460 memcpy(un->un_path, cnp->cn_nameptr, 461 cnp->cn_namelen); 462 un->un_path[cnp->cn_namelen] = '\0'; 463 VREF(dvp); 464 un->un_dirvp = dvp; 465 } 466 } else if (lowervp) { 467 vrele(lowervp); 468 } 469 *vpp = UNIONTOV(un); 470 return (0); 471 } 472 473 if (docache) { 474 /* 475 * otherwise lock the vp list while we call getnewvnode 476 * since that can block. 477 */ 478 hash = UNION_HASH(uppervp, lowervp); 479 480 if (union_list_lock(hash)) 481 goto loop; 482 } 483 484 error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); 485 if (error) { 486 if (uppervp) { 487 if (dvp == uppervp) 488 vrele(uppervp); 489 else 490 vput(uppervp); 491 } 492 if (lowervp) 493 vrele(lowervp); 494 495 goto out; 496 } 497 498 MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), 499 M_TEMP, M_WAITOK); 500 501 (*vpp)->v_flag |= vflag; 502 (*vpp)->v_vnlock = NULL; /* Make upper layers call VOP_LOCK */ 503 if (uppervp) 504 (*vpp)->v_type = uppervp->v_type; 505 else 506 (*vpp)->v_type = lowervp->v_type; 507 un = VTOUNION(*vpp); 508 un->un_vnode = *vpp; 509 un->un_uppervp = uppervp; 510 un->un_uppersz = VNOVAL; 511 un->un_lowervp = lowervp; 512 un->un_lowersz = VNOVAL; 513 un->un_pvp = undvp; 514 if (undvp != NULLVP) 515 VREF(undvp); 516 un->un_dircache = 0; 517 un->un_openl = 0; 518 un->un_flags = UN_LOCKED; 519 if (un->un_uppervp) 520 un->un_flags |= UN_ULOCK; 521 #ifdef DIAGNOSTIC 522 if (curproc) 523 un->un_pid = curproc->p_pid; 524 else 525 un->un_pid = -1; 526 #endif 527 if (cnp && (lowervp != NULLVP)) { 528 un->un_hash = cnp->cn_hash; 529 un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); 530 memcpy(un->un_path, cnp->cn_nameptr, cnp->cn_namelen); 531 un->un_path[cnp->cn_namelen] = '\0'; 532 VREF(dvp); 533 un->un_dirvp = dvp; 534 } else { 535 un->un_hash = 0; 536 un->un_path = 0; 537 un->un_dirvp = 0; 538 } 539 540 if (docache) { 541 LIST_INSERT_HEAD(&unhead[hash], un, un_cache); 542 un->un_flags |= UN_CACHED; 543 } 544 545 if (xlowervp) 546 vrele(xlowervp); 547 548 out: 549 if (docache) 550 union_list_unlock(hash); 551 552 return (error); 553 } 554 555 int 556 union_freevp(vp) 557 struct vnode *vp; 558 { 559 struct union_node *un = VTOUNION(vp); 560 561 if (un->un_flags & UN_CACHED) { 562 un->un_flags &= ~UN_CACHED; 563 LIST_REMOVE(un, un_cache); 564 } 565 566 if (un->un_pvp != NULLVP) 567 vrele(un->un_pvp); 568 if (un->un_uppervp != NULLVP) 569 vrele(un->un_uppervp); 570 if (un->un_lowervp != NULLVP) 571 vrele(un->un_lowervp); 572 if (un->un_dirvp != NULLVP) 573 vrele(un->un_dirvp); 574 if (un->un_path) 575 free(un->un_path, M_TEMP); 576 577 FREE(vp->v_data, M_TEMP); 578 vp->v_data = 0; 579 580 return (0); 581 } 582 583 /* 584 * copyfile. copy the vnode (fvp) to the vnode (tvp) 585 * using a sequence of reads and writes. both (fvp) 586 * and (tvp) are locked on entry and exit. 587 */ 588 int 589 union_copyfile(fvp, tvp, cred, p) 590 struct vnode *fvp; 591 struct vnode *tvp; 592 struct ucred *cred; 593 struct proc *p; 594 { 595 char *buf; 596 struct uio uio; 597 struct iovec iov; 598 int error = 0; 599 600 /* 601 * strategy: 602 * allocate a buffer of size MAXBSIZE. 603 * loop doing reads and writes, keeping track 604 * of the current uio offset. 605 * give up at the first sign of trouble. 606 */ 607 608 uio.uio_procp = p; 609 uio.uio_segflg = UIO_SYSSPACE; 610 uio.uio_offset = 0; 611 612 VOP_UNLOCK(fvp, 0); /* XXX */ 613 VOP_LEASE(fvp, p, cred, LEASE_READ); 614 vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 615 VOP_UNLOCK(tvp, 0); /* XXX */ 616 VOP_LEASE(tvp, p, cred, LEASE_WRITE); 617 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 618 619 buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); 620 621 /* ugly loop follows... */ 622 do { 623 off_t offset = uio.uio_offset; 624 625 uio.uio_iov = &iov; 626 uio.uio_iovcnt = 1; 627 iov.iov_base = buf; 628 iov.iov_len = MAXBSIZE; 629 uio.uio_resid = iov.iov_len; 630 uio.uio_rw = UIO_READ; 631 error = VOP_READ(fvp, &uio, 0, cred); 632 633 if (error == 0) { 634 uio.uio_iov = &iov; 635 uio.uio_iovcnt = 1; 636 iov.iov_base = buf; 637 iov.iov_len = MAXBSIZE - uio.uio_resid; 638 uio.uio_offset = offset; 639 uio.uio_rw = UIO_WRITE; 640 uio.uio_resid = iov.iov_len; 641 642 if (uio.uio_resid == 0) 643 break; 644 645 do { 646 error = VOP_WRITE(tvp, &uio, 0, cred); 647 } while ((uio.uio_resid > 0) && (error == 0)); 648 } 649 650 } while (error == 0); 651 652 free(buf, M_TEMP); 653 return (error); 654 } 655 656 /* 657 * (un) is assumed to be locked on entry and remains 658 * locked on exit. 659 */ 660 int 661 union_copyup(un, docopy, cred, p) 662 struct union_node *un; 663 int docopy; 664 struct ucred *cred; 665 struct proc *p; 666 { 667 int error; 668 struct vnode *lvp, *uvp; 669 struct vattr lvattr, uvattr; 670 671 error = union_vn_create(&uvp, un, p); 672 if (error) 673 return (error); 674 675 /* at this point, uppervp is locked */ 676 union_newupper(un, uvp); 677 un->un_flags |= UN_ULOCK; 678 679 lvp = un->un_lowervp; 680 681 if (docopy) { 682 /* 683 * XX - should not ignore errors 684 * from VOP_CLOSE 685 */ 686 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); 687 688 error = VOP_GETATTR(lvp, &lvattr, cred, p); 689 if (error == 0) 690 error = VOP_OPEN(lvp, FREAD, cred, p); 691 if (error == 0) { 692 error = union_copyfile(lvp, uvp, cred, p); 693 (void) VOP_CLOSE(lvp, FREAD, cred, p); 694 } 695 if (error == 0) { 696 /* Copy permissions up too */ 697 VATTR_NULL(&uvattr); 698 uvattr.va_mode = lvattr.va_mode; 699 uvattr.va_flags = lvattr.va_flags; 700 error = VOP_SETATTR(uvp, &uvattr, cred, p); 701 } 702 VOP_UNLOCK(lvp, 0); 703 #ifdef UNION_DIAGNOSTIC 704 if (error == 0) 705 uprintf("union: copied up %s\n", un->un_path); 706 #endif 707 708 } 709 union_vn_close(uvp, FWRITE, cred, p); 710 711 /* 712 * Subsequent IOs will go to the top layer, so 713 * call close on the lower vnode and open on the 714 * upper vnode to ensure that the filesystem keeps 715 * its references counts right. This doesn't do 716 * the right thing with (cred) and (FREAD) though. 717 * Ignoring error returns is not right, either. 718 */ 719 if (error == 0) { 720 int i; 721 722 vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); 723 for (i = 0; i < un->un_openl; i++) { 724 (void) VOP_CLOSE(lvp, FREAD, cred, p); 725 (void) VOP_OPEN(uvp, FREAD, cred, p); 726 } 727 un->un_openl = 0; 728 VOP_UNLOCK(lvp, 0); 729 } 730 731 return (error); 732 733 } 734 735 static int 736 union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) 737 struct union_mount *um; 738 struct vnode *dvp; 739 struct vnode **vpp; 740 struct componentname *cnp; 741 struct componentname *cn; 742 const char *path; 743 int pathlen; 744 { 745 int error; 746 747 /* 748 * A new componentname structure must be faked up because 749 * there is no way to know where the upper level cnp came 750 * from or what it is being used for. This must duplicate 751 * some of the work done by NDINIT, some of the work done 752 * by namei, some of the work done by lookup and some of 753 * the work done by VOP_LOOKUP when given a CREATE flag. 754 * Conclusion: Horrible. 755 * 756 * The pathname buffer will be PNBUF_PUT'd by VOP_MKDIR. 757 */ 758 cn->cn_namelen = pathlen; 759 if ((cn->cn_namelen + 1) > MAXPATHLEN) 760 return (ENAMETOOLONG); 761 cn->cn_pnbuf = PNBUF_GET(); 762 memcpy(cn->cn_pnbuf, path, cn->cn_namelen); 763 cn->cn_pnbuf[cn->cn_namelen] = '\0'; 764 765 cn->cn_nameiop = CREATE; 766 cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); 767 cn->cn_proc = cnp->cn_proc; 768 if (um->um_op == UNMNT_ABOVE) 769 cn->cn_cred = cnp->cn_cred; 770 else 771 cn->cn_cred = um->um_cred; 772 cn->cn_nameptr = cn->cn_pnbuf; 773 cn->cn_hash = cnp->cn_hash; 774 cn->cn_consume = cnp->cn_consume; 775 776 VREF(dvp); 777 error = relookup(dvp, vpp, cn); 778 if (!error) 779 vrele(dvp); 780 else { 781 PNBUF_PUT(cn->cn_pnbuf); 782 cn->cn_pnbuf = 0; 783 } 784 785 return (error); 786 } 787 788 /* 789 * Create a shadow directory in the upper layer. 790 * The new vnode is returned locked. 791 * 792 * (um) points to the union mount structure for access to the 793 * the mounting process's credentials. 794 * (dvp) is the directory in which to create the shadow directory. 795 * it is unlocked on entry and exit. 796 * (cnp) is the componentname to be created. 797 * (vpp) is the returned newly created shadow directory, which 798 * is returned locked. 799 * 800 * N.B. We still attempt to create shadow directories even if the union 801 * is mounted read-only, which is a little nonintuitive. 802 */ 803 int 804 union_mkshadow(um, dvp, cnp, vpp) 805 struct union_mount *um; 806 struct vnode *dvp; 807 struct componentname *cnp; 808 struct vnode **vpp; 809 { 810 int error; 811 struct vattr va; 812 struct proc *p = cnp->cn_proc; 813 struct componentname cn; 814 815 error = union_relookup(um, dvp, vpp, cnp, &cn, 816 cnp->cn_nameptr, cnp->cn_namelen); 817 if (error) 818 return (error); 819 820 if (*vpp) { 821 VOP_ABORTOP(dvp, &cn); 822 VOP_UNLOCK(dvp, 0); 823 vrele(*vpp); 824 *vpp = NULLVP; 825 return (EEXIST); 826 } 827 828 /* 829 * policy: when creating the shadow directory in the 830 * upper layer, create it owned by the user who did 831 * the mount, group from parent directory, and mode 832 * 777 modified by umask (ie mostly identical to the 833 * mkdir syscall). (jsp, kb) 834 */ 835 836 VATTR_NULL(&va); 837 va.va_type = VDIR; 838 va.va_mode = um->um_cmode; 839 840 /* VOP_LEASE: dvp is locked */ 841 VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); 842 843 error = VOP_MKDIR(dvp, vpp, &cn, &va); 844 return (error); 845 } 846 847 /* 848 * Create a whiteout entry in the upper layer. 849 * 850 * (um) points to the union mount structure for access to the 851 * the mounting process's credentials. 852 * (dvp) is the directory in which to create the whiteout. 853 * it is locked on entry and exit. 854 * (cnp) is the componentname to be created. 855 */ 856 int 857 union_mkwhiteout(um, dvp, cnp, path) 858 struct union_mount *um; 859 struct vnode *dvp; 860 struct componentname *cnp; 861 char *path; 862 { 863 int error; 864 struct proc *p = cnp->cn_proc; 865 struct vnode *wvp; 866 struct componentname cn; 867 868 VOP_UNLOCK(dvp, 0); 869 error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); 870 if (error) { 871 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); 872 return (error); 873 } 874 875 if (wvp) { 876 VOP_ABORTOP(dvp, &cn); 877 vrele(dvp); 878 vrele(wvp); 879 return (EEXIST); 880 } 881 882 /* VOP_LEASE: dvp is locked */ 883 VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); 884 885 error = VOP_WHITEOUT(dvp, &cn, CREATE); 886 if (error) 887 VOP_ABORTOP(dvp, &cn); 888 889 vrele(dvp); 890 891 return (error); 892 } 893 894 /* 895 * union_vn_create: creates and opens a new shadow file 896 * on the upper union layer. this function is similar 897 * in spirit to calling vn_open but it avoids calling namei(). 898 * the problem with calling namei is that a) it locks too many 899 * things, and b) it doesn't start at the "right" directory, 900 * whereas relookup is told where to start. 901 */ 902 int 903 union_vn_create(vpp, un, p) 904 struct vnode **vpp; 905 struct union_node *un; 906 struct proc *p; 907 { 908 struct vnode *vp; 909 struct ucred *cred = p->p_ucred; 910 struct vattr vat; 911 struct vattr *vap = &vat; 912 int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); 913 int error; 914 int cmode = UN_FILEMODE & ~p->p_cwdi->cwdi_cmask; 915 struct componentname cn; 916 917 *vpp = NULLVP; 918 919 /* 920 * Build a new componentname structure (for the same 921 * reasons outlines in union_mkshadow). 922 * The difference here is that the file is owned by 923 * the current user, rather than by the person who 924 * did the mount, since the current user needs to be 925 * able to write the file (that's why it is being 926 * copied in the first place). 927 */ 928 cn.cn_namelen = strlen(un->un_path); 929 if ((cn.cn_namelen + 1) > MAXPATHLEN) 930 return (ENAMETOOLONG); 931 cn.cn_pnbuf = PNBUF_GET(); 932 memcpy(cn.cn_pnbuf, un->un_path, cn.cn_namelen+1); 933 cn.cn_nameiop = CREATE; 934 cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); 935 cn.cn_proc = p; 936 cn.cn_cred = p->p_ucred; 937 cn.cn_nameptr = cn.cn_pnbuf; 938 cn.cn_hash = un->un_hash; 939 cn.cn_consume = 0; 940 941 VREF(un->un_dirvp); 942 if ((error = relookup(un->un_dirvp, &vp, &cn)) != 0) 943 return (error); 944 vrele(un->un_dirvp); 945 946 if (vp) { 947 VOP_ABORTOP(un->un_dirvp, &cn); 948 if (un->un_dirvp == vp) 949 vrele(un->un_dirvp); 950 else 951 vput(un->un_dirvp); 952 vrele(vp); 953 return (EEXIST); 954 } 955 956 /* 957 * Good - there was no race to create the file 958 * so go ahead and create it. The permissions 959 * on the file will be 0666 modified by the 960 * current user's umask. Access to the file, while 961 * it is unioned, will require access to the top *and* 962 * bottom files. Access when not unioned will simply 963 * require access to the top-level file. 964 * TODO: confirm choice of access permissions. 965 */ 966 VATTR_NULL(vap); 967 vap->va_type = VREG; 968 vap->va_mode = cmode; 969 VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); 970 if ((error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap)) != 0) 971 return (error); 972 973 if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0) { 974 vput(vp); 975 return (error); 976 } 977 978 vp->v_writecount++; 979 *vpp = vp; 980 return (0); 981 } 982 983 int 984 union_vn_close(vp, fmode, cred, p) 985 struct vnode *vp; 986 int fmode; 987 struct ucred *cred; 988 struct proc *p; 989 { 990 991 if (fmode & FWRITE) 992 --vp->v_writecount; 993 return (VOP_CLOSE(vp, fmode, cred, p)); 994 } 995 996 void 997 union_removed_upper(un) 998 struct union_node *un; 999 { 1000 #if 1 1001 /* 1002 * We do not set the uppervp to NULLVP here, because lowervp 1003 * may also be NULLVP, so this routine would end up creating 1004 * a bogus union node with no upper or lower VP (that causes 1005 * pain in many places that assume at least one VP exists). 1006 * Since we've removed this node from the cache hash chains, 1007 * it won't be found again. When all current holders 1008 * release it, union_inactive() will vgone() it. 1009 */ 1010 union_diruncache(un); 1011 #else 1012 union_newupper(un, NULLVP); 1013 #endif 1014 1015 if (un->un_flags & UN_CACHED) { 1016 un->un_flags &= ~UN_CACHED; 1017 LIST_REMOVE(un, un_cache); 1018 } 1019 1020 if (un->un_flags & UN_ULOCK) { 1021 un->un_flags &= ~UN_ULOCK; 1022 VOP_UNLOCK(un->un_uppervp, 0); 1023 } 1024 } 1025 1026 #if 0 1027 struct vnode * 1028 union_lowervp(vp) 1029 struct vnode *vp; 1030 { 1031 struct union_node *un = VTOUNION(vp); 1032 1033 if ((un->un_lowervp != NULLVP) && 1034 (vp->v_type == un->un_lowervp->v_type)) { 1035 if (vget(un->un_lowervp, 0) == 0) 1036 return (un->un_lowervp); 1037 } 1038 1039 return (NULLVP); 1040 } 1041 #endif 1042 1043 /* 1044 * determine whether a whiteout is needed 1045 * during a remove/rmdir operation. 1046 */ 1047 int 1048 union_dowhiteout(un, cred, p) 1049 struct union_node *un; 1050 struct ucred *cred; 1051 struct proc *p; 1052 { 1053 struct vattr va; 1054 1055 if (un->un_lowervp != NULLVP) 1056 return (1); 1057 1058 if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && 1059 (va.va_flags & OPAQUE)) 1060 return (1); 1061 1062 return (0); 1063 } 1064 1065 static void 1066 union_dircache_r(vp, vppp, cntp) 1067 struct vnode *vp; 1068 struct vnode ***vppp; 1069 int *cntp; 1070 { 1071 struct union_node *un; 1072 1073 if (vp->v_op != union_vnodeop_p) { 1074 if (vppp) { 1075 VREF(vp); 1076 *(*vppp)++ = vp; 1077 if (--(*cntp) == 0) 1078 panic("union: dircache table too small"); 1079 } else { 1080 (*cntp)++; 1081 } 1082 1083 return; 1084 } 1085 1086 un = VTOUNION(vp); 1087 if (un->un_uppervp != NULLVP) 1088 union_dircache_r(un->un_uppervp, vppp, cntp); 1089 if (un->un_lowervp != NULLVP) 1090 union_dircache_r(un->un_lowervp, vppp, cntp); 1091 } 1092 1093 struct vnode * 1094 union_dircache(vp, p) 1095 struct vnode *vp; 1096 struct proc *p; 1097 { 1098 int cnt; 1099 struct vnode *nvp = NULLVP; 1100 struct vnode **vpp; 1101 struct vnode **dircache; 1102 int error; 1103 1104 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1105 dircache = VTOUNION(vp)->un_dircache; 1106 1107 nvp = NULLVP; 1108 1109 if (dircache == 0) { 1110 cnt = 0; 1111 union_dircache_r(vp, 0, &cnt); 1112 cnt++; 1113 dircache = (struct vnode **) 1114 malloc(cnt * sizeof(struct vnode *), 1115 M_TEMP, M_WAITOK); 1116 vpp = dircache; 1117 union_dircache_r(vp, &vpp, &cnt); 1118 VTOUNION(vp)->un_dircache = dircache; 1119 *vpp = NULLVP; 1120 vpp = dircache + 1; 1121 } else { 1122 vpp = dircache; 1123 do { 1124 if (*vpp++ == VTOUNION(vp)->un_uppervp) 1125 break; 1126 } while (*vpp != NULLVP); 1127 } 1128 1129 if (*vpp == NULLVP) 1130 goto out; 1131 1132 vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); 1133 VREF(*vpp); 1134 error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); 1135 if (!error) { 1136 VTOUNION(vp)->un_dircache = 0; 1137 VTOUNION(nvp)->un_dircache = dircache; 1138 } 1139 1140 out: 1141 VOP_UNLOCK(vp, 0); 1142 return (nvp); 1143 } 1144 1145 void 1146 union_diruncache(un) 1147 struct union_node *un; 1148 { 1149 struct vnode **vpp; 1150 1151 if (un->un_dircache != 0) { 1152 for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) 1153 vrele(*vpp); 1154 free(un->un_dircache, M_TEMP); 1155 un->un_dircache = 0; 1156 } 1157 } 1158 1159 /* 1160 * This hook is called from vn_readdir() to switch to lower directory 1161 * entry after the upper directory is read. 1162 */ 1163 int 1164 union_readdirhook(struct vnode **vpp, struct file *fp, struct proc *p) 1165 { 1166 struct vnode *vp = *vpp, *lvp; 1167 struct vattr va; 1168 int error; 1169 1170 if (vp->v_op != union_vnodeop_p) 1171 return (0); 1172 1173 if ((lvp = union_dircache(vp, p)) == NULLVP) 1174 return (0); 1175 1176 /* 1177 * If the directory is opaque, 1178 * then don't show lower entries 1179 */ 1180 error = VOP_GETATTR(vp, &va, fp->f_cred, p); 1181 if (error || (va.va_flags & OPAQUE)) { 1182 vput(lvp); 1183 return (error); 1184 } 1185 1186 error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); 1187 if (error) { 1188 vput(lvp); 1189 return (error); 1190 } 1191 VOP_UNLOCK(lvp, 0); 1192 fp->f_data = (caddr_t) lvp; 1193 fp->f_offset = 0; 1194 error = vn_close(vp, FREAD, fp->f_cred, p); 1195 if (error) 1196 return (error); 1197 *vpp = lvp; 1198 return (0); 1199 } 1200