1 /* $NetBSD: genfs_vnops.c,v 1.23 2000/12/09 22:38:23 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 */ 36 37 #include "opt_nfsserver.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/kernel.h> 43 #include <sys/mount.h> 44 #include <sys/namei.h> 45 #include <sys/vnode.h> 46 #include <sys/fcntl.h> 47 #include <sys/malloc.h> 48 #include <sys/poll.h> 49 50 #include <miscfs/genfs/genfs.h> 51 #include <miscfs/specfs/specdev.h> 52 53 #include <uvm/uvm.h> 54 #include <uvm/uvm_pager.h> 55 56 #ifdef NFSSERVER 57 #include <nfs/rpcv2.h> 58 #include <nfs/nfsproto.h> 59 #include <nfs/nfs.h> 60 #include <nfs/nqnfs.h> 61 #include <nfs/nfs_var.h> 62 #endif 63 64 int 65 genfs_poll(v) 66 void *v; 67 { 68 struct vop_poll_args /* { 69 struct vnode *a_vp; 70 int a_events; 71 struct proc *a_p; 72 } */ *ap = v; 73 74 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 75 } 76 77 int 78 genfs_fsync(v) 79 void *v; 80 { 81 struct vop_fsync_args /* { 82 struct vnode *a_vp; 83 struct ucred *a_cred; 84 int a_flags; 85 off_t offlo; 86 off_t offhi; 87 struct proc *a_p; 88 } */ *ap = v; 89 struct vnode *vp = ap->a_vp; 90 int wait; 91 92 wait = (ap->a_flags & FSYNC_WAIT) != 0; 93 vflushbuf(vp, wait); 94 if ((ap->a_flags & FSYNC_DATAONLY) != 0) 95 return (0); 96 else 97 return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0)); 98 } 99 100 int 101 genfs_seek(v) 102 void *v; 103 { 104 struct vop_seek_args /* { 105 struct vnode *a_vp; 106 off_t a_oldoff; 107 off_t a_newoff; 108 struct ucred *a_ucred; 109 } */ *ap = v; 110 111 if (ap->a_newoff < 0) 112 return (EINVAL); 113 114 return (0); 115 } 116 117 int 118 genfs_abortop(v) 119 void *v; 120 { 121 struct vop_abortop_args /* { 122 struct vnode *a_dvp; 123 struct componentname *a_cnp; 124 } */ *ap = v; 125 126 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) 127 PNBUF_PUT(ap->a_cnp->cn_pnbuf); 128 return (0); 129 } 130 131 int 132 genfs_fcntl(v) 133 void *v; 134 { 135 struct vop_fcntl_args /* { 136 struct vnode *a_vp; 137 u_int a_command; 138 caddr_t a_data; 139 int a_fflag; 140 struct ucred *a_cred; 141 struct proc *a_p; 142 } */ *ap = v; 143 144 if (ap->a_command == F_SETFL) 145 return (0); 146 else 147 return (EOPNOTSUPP); 148 } 149 150 /*ARGSUSED*/ 151 int 152 genfs_badop(v) 153 void *v; 154 { 155 156 panic("genfs: bad op"); 157 } 158 159 /*ARGSUSED*/ 160 int 161 genfs_nullop(v) 162 void *v; 163 { 164 165 return (0); 166 } 167 168 /*ARGSUSED*/ 169 int 170 genfs_einval(v) 171 void *v; 172 { 173 174 return (EINVAL); 175 } 176 177 /*ARGSUSED*/ 178 int 179 genfs_eopnotsupp(v) 180 void *v; 181 { 182 183 return (EOPNOTSUPP); 184 } 185 186 /* 187 * Called when an fs doesn't support a particular vop but the vop needs to 188 * vrele, vput, or vunlock passed in vnodes. 189 */ 190 int 191 genfs_eopnotsupp_rele(v) 192 void *v; 193 { 194 struct vop_generic_args /* 195 struct vnodeop_desc *a_desc; 196 / * other random data follows, presumably * / 197 } */ *ap = v; 198 struct vnodeop_desc *desc = ap->a_desc; 199 struct vnode *vp; 200 int flags, i, j, offset; 201 202 flags = desc->vdesc_flags; 203 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) { 204 if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET) 205 break; /* stop at end of list */ 206 if ((j = flags & VDESC_VP0_WILLPUT)) { 207 vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap); 208 switch (j) { 209 case VDESC_VP0_WILLPUT: 210 vput(vp); 211 break; 212 case VDESC_VP0_WILLUNLOCK: 213 VOP_UNLOCK(vp, 0); 214 break; 215 case VDESC_VP0_WILLRELE: 216 vrele(vp); 217 break; 218 } 219 } 220 } 221 222 return (EOPNOTSUPP); 223 } 224 225 /*ARGSUSED*/ 226 int 227 genfs_ebadf(v) 228 void *v; 229 { 230 231 return (EBADF); 232 } 233 234 /* ARGSUSED */ 235 int 236 genfs_enoioctl(v) 237 void *v; 238 { 239 240 return (ENOTTY); 241 } 242 243 244 /* 245 * Eliminate all activity associated with the requested vnode 246 * and with all vnodes aliased to the requested vnode. 247 */ 248 int 249 genfs_revoke(v) 250 void *v; 251 { 252 struct vop_revoke_args /* { 253 struct vnode *a_vp; 254 int a_flags; 255 } */ *ap = v; 256 struct vnode *vp, *vq; 257 struct proc *p = curproc; /* XXX */ 258 259 #ifdef DIAGNOSTIC 260 if ((ap->a_flags & REVOKEALL) == 0) 261 panic("genfs_revoke: not revokeall"); 262 #endif 263 264 vp = ap->a_vp; 265 simple_lock(&vp->v_interlock); 266 267 if (vp->v_flag & VALIASED) { 268 /* 269 * If a vgone (or vclean) is already in progress, 270 * wait until it is done and return. 271 */ 272 if (vp->v_flag & VXLOCK) { 273 vp->v_flag |= VXWANT; 274 simple_unlock(&vp->v_interlock); 275 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 276 return (0); 277 } 278 /* 279 * Ensure that vp will not be vgone'd while we 280 * are eliminating its aliases. 281 */ 282 vp->v_flag |= VXLOCK; 283 simple_unlock(&vp->v_interlock); 284 while (vp->v_flag & VALIASED) { 285 simple_lock(&spechash_slock); 286 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 287 if (vq->v_rdev != vp->v_rdev || 288 vq->v_type != vp->v_type || vp == vq) 289 continue; 290 simple_unlock(&spechash_slock); 291 vgone(vq); 292 break; 293 } 294 if (vq == NULLVP) 295 simple_unlock(&spechash_slock); 296 } 297 /* 298 * Remove the lock so that vgone below will 299 * really eliminate the vnode after which time 300 * vgone will awaken any sleepers. 301 */ 302 simple_lock(&vp->v_interlock); 303 vp->v_flag &= ~VXLOCK; 304 } 305 vgonel(vp, p); 306 return (0); 307 } 308 309 /* 310 * Lock the node. 311 */ 312 int 313 genfs_lock(v) 314 void *v; 315 { 316 struct vop_lock_args /* { 317 struct vnode *a_vp; 318 int a_flags; 319 } */ *ap = v; 320 struct vnode *vp = ap->a_vp; 321 322 return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock)); 323 } 324 325 /* 326 * Unlock the node. 327 */ 328 int 329 genfs_unlock(v) 330 void *v; 331 { 332 struct vop_unlock_args /* { 333 struct vnode *a_vp; 334 int a_flags; 335 } */ *ap = v; 336 struct vnode *vp = ap->a_vp; 337 338 return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE, 339 &vp->v_interlock)); 340 } 341 342 /* 343 * Return whether or not the node is locked. 344 */ 345 int 346 genfs_islocked(v) 347 void *v; 348 { 349 struct vop_islocked_args /* { 350 struct vnode *a_vp; 351 } */ *ap = v; 352 struct vnode *vp = ap->a_vp; 353 354 return (lockstatus(&vp->v_lock)); 355 } 356 357 /* 358 * Stubs to use when there is no locking to be done on the underlying object. 359 */ 360 int 361 genfs_nolock(v) 362 void *v; 363 { 364 struct vop_lock_args /* { 365 struct vnode *a_vp; 366 int a_flags; 367 struct proc *a_p; 368 } */ *ap = v; 369 370 /* 371 * Since we are not using the lock manager, we must clear 372 * the interlock here. 373 */ 374 if (ap->a_flags & LK_INTERLOCK) 375 simple_unlock(&ap->a_vp->v_interlock); 376 return (0); 377 } 378 379 int 380 genfs_nounlock(v) 381 void *v; 382 { 383 return (0); 384 } 385 386 int 387 genfs_noislocked(v) 388 void *v; 389 { 390 return (0); 391 } 392 393 /* 394 * Local lease check for NFS servers. Just set up args and let 395 * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel, 396 * this is a null operation. 397 */ 398 int 399 genfs_lease_check(v) 400 void *v; 401 { 402 #ifdef NFSSERVER 403 struct vop_lease_args /* { 404 struct vnode *a_vp; 405 struct proc *a_p; 406 struct ucred *a_cred; 407 int a_flag; 408 } */ *ap = v; 409 u_int32_t duration = 0; 410 int cache; 411 u_quad_t frev; 412 413 (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag, 414 NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred); 415 return (0); 416 #else 417 return (0); 418 #endif /* NFSSERVER */ 419 } 420 421 /* 422 * generic VM getpages routine. 423 * Return PG_BUSY pages for the given range, 424 * reading from backing store if necessary. 425 */ 426 427 int 428 genfs_getpages(v) 429 void *v; 430 { 431 struct vop_getpages_args /* { 432 struct vnode *a_vp; 433 voff_t a_offset; 434 vm_page_t *a_m; 435 int *a_count; 436 int a_centeridx; 437 vm_prot_t a_access_type; 438 int a_advice; 439 int a_flags; 440 } */ *ap = v; 441 442 off_t eof, offset, origoffset, startoffset, endoffset, raoffset; 443 daddr_t lbn, blkno; 444 int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount; 445 int fs_bshift, fs_bsize, dev_bshift, dev_bsize; 446 int flags = ap->a_flags; 447 size_t bytes, iobytes, tailbytes, totalbytes, skipbytes; 448 vaddr_t kva; 449 struct buf *bp, *mbp; 450 struct vnode *vp = ap->a_vp; 451 struct uvm_object *uobj = &vp->v_uvm.u_obj; 452 struct vm_page *pgs[16]; /* XXXUBC 16 */ 453 struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */ 454 boolean_t async = (flags & PGO_SYNCIO) == 0; 455 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 456 boolean_t sawhole = FALSE; 457 UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist); 458 459 /* XXXUBC temp limit */ 460 if (*ap->a_count > 16) { 461 return EINVAL; 462 } 463 464 error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof); 465 if (error) { 466 return error; 467 } 468 469 #ifdef DIAGNOSTIC 470 if (ap->a_centeridx < 0 || ap->a_centeridx > *ap->a_count) { 471 panic("genfs_getpages: centeridx %d out of range", 472 ap->a_centeridx); 473 } 474 if (ap->a_offset & (PAGE_SIZE - 1) || ap->a_offset < 0) { 475 panic("genfs_getpages: offset 0x%x", (int)ap->a_offset); 476 } 477 if (*ap->a_count < 0) { 478 panic("genfs_getpages: count %d < 0", *ap->a_count); 479 } 480 #endif 481 482 /* 483 * Bounds-check the request. 484 */ 485 486 error = 0; 487 origoffset = ap->a_offset; 488 489 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= eof && 490 (flags & PGO_PASTEOF) == 0) { 491 if ((flags & PGO_LOCKED) == 0) { 492 simple_unlock(&uobj->vmobjlock); 493 } 494 UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x", 495 origoffset, *ap->a_count, eof,0); 496 return EINVAL; 497 } 498 499 /* 500 * For PGO_LOCKED requests, just return whatever's in memory. 501 */ 502 503 if (flags & PGO_LOCKED) { 504 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, 505 UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY); 506 507 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; 508 } 509 510 /* vnode is VOP_LOCKed, uobj is locked */ 511 512 if (write && (vp->v_flag & VONWORKLST) == 0) { 513 vn_syncer_add_to_worklist(vp, filedelay); 514 } 515 516 /* 517 * find the requested pages and make some simple checks. 518 * leave space in the page array for a whole block. 519 */ 520 521 fs_bshift = vp->v_mount->mnt_fs_bshift; 522 fs_bsize = 1 << fs_bshift; 523 dev_bshift = vp->v_mount->mnt_dev_bshift; 524 dev_bsize = 1 << dev_bshift; 525 KASSERT((eof & (dev_bsize - 1)) == 0); 526 527 orignpages = min(*ap->a_count, 528 round_page(eof - origoffset) >> PAGE_SHIFT); 529 if (flags & PGO_PASTEOF) { 530 orignpages = *ap->a_count; 531 } 532 npages = orignpages; 533 startoffset = origoffset & ~(fs_bsize - 1); 534 endoffset = round_page((origoffset + (npages << PAGE_SHIFT) 535 + fs_bsize - 1) & ~(fs_bsize - 1)); 536 endoffset = min(endoffset, round_page(eof)); 537 ridx = (origoffset - startoffset) >> PAGE_SHIFT; 538 539 memset(pgs, 0, sizeof(pgs)); 540 uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL); 541 542 /* 543 * if PGO_OVERWRITE is set, don't bother reading the pages. 544 * PGO_OVERWRITE also means that the caller guarantees 545 * that the pages already have backing store allocated. 546 */ 547 548 if (flags & PGO_OVERWRITE) { 549 UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); 550 551 for (i = 0; i < npages; i++) { 552 struct vm_page *pg = pgs[ridx + i]; 553 554 if (pg->flags & PG_FAKE) { 555 uvm_pagezero(pg); 556 pg->flags &= ~(PG_FAKE); 557 } 558 pg->flags &= ~(PG_RDONLY); 559 } 560 goto out; 561 } 562 563 /* 564 * if the pages are already resident, just return them. 565 */ 566 567 for (i = 0; i < npages; i++) { 568 struct vm_page *pg = pgs[ridx + i]; 569 570 if ((pg->flags & PG_FAKE) || 571 (write && (pg->flags & PG_RDONLY))) { 572 break; 573 } 574 } 575 if (i == npages) { 576 UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); 577 raoffset = origoffset + (orignpages << PAGE_SHIFT); 578 goto raout; 579 } 580 581 /* 582 * the page wasn't resident and we're not overwriting, 583 * so we're going to have to do some i/o. 584 * find any additional pages needed to cover the expanded range. 585 */ 586 587 if (startoffset != origoffset) { 588 589 /* 590 * XXXUBC we need to avoid deadlocks caused by locking 591 * additional pages at lower offsets than pages we 592 * already have locked. for now, unlock them all and 593 * start over. 594 */ 595 596 for (i = 0; i < npages; i++) { 597 struct vm_page *pg = pgs[ridx + i]; 598 599 if (pg->flags & PG_FAKE) { 600 pg->flags |= PG_RELEASED; 601 } 602 } 603 uvm_page_unbusy(&pgs[ridx], npages); 604 memset(pgs, 0, sizeof(pgs)); 605 606 UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x", 607 startoffset, endoffset, 0,0); 608 npages = (endoffset - startoffset) >> PAGE_SHIFT; 609 npgs = npages; 610 uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL); 611 } 612 simple_unlock(&uobj->vmobjlock); 613 614 /* 615 * read the desired page(s). 616 */ 617 618 totalbytes = npages << PAGE_SHIFT; 619 bytes = min(totalbytes, eof - startoffset); 620 tailbytes = totalbytes - bytes; 621 skipbytes = 0; 622 623 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK | 624 UVMPAGER_MAPIN_READ); 625 626 s = splbio(); 627 mbp = pool_get(&bufpool, PR_WAITOK); 628 splx(s); 629 mbp->b_bufsize = totalbytes; 630 mbp->b_data = (void *)kva; 631 mbp->b_resid = mbp->b_bcount = bytes; 632 mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0); 633 mbp->b_iodone = uvm_aio_biodone; 634 mbp->b_vp = vp; 635 LIST_INIT(&mbp->b_dep); 636 637 /* 638 * if EOF is in the middle of the last page, zero the part past EOF. 639 */ 640 641 if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) { 642 memset((void *)(kva + bytes), 0, tailbytes); 643 } 644 645 /* 646 * now loop over the pages, reading as needed. 647 */ 648 649 if (write) { 650 lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL); 651 } else { 652 lockmgr(&vp->v_glock, LK_SHARED, NULL); 653 } 654 655 bp = NULL; 656 for (offset = startoffset; 657 bytes > 0; 658 offset += iobytes, bytes -= iobytes) { 659 660 /* 661 * skip pages which don't need to be read. 662 */ 663 664 pidx = (offset - startoffset) >> PAGE_SHIFT; 665 while ((pgs[pidx]->flags & PG_FAKE) == 0) { 666 size_t b; 667 668 #ifdef DEBUG 669 if (offset & (PAGE_SIZE - 1)) { 670 panic("genfs_getpages: skipping from middle " 671 "of page"); 672 } 673 #endif 674 675 b = min(PAGE_SIZE, bytes); 676 offset += b; 677 bytes -= b; 678 skipbytes += b; 679 pidx++; 680 UVMHIST_LOG(ubchist, "skipping, new offset 0x%x", 681 offset, 0,0,0); 682 if (bytes == 0) { 683 goto loopdone; 684 } 685 } 686 687 /* 688 * bmap the file to find out the blkno to read from and 689 * how much we can read in one i/o. if bmap returns an error, 690 * skip the rest of the top-level i/o. 691 */ 692 693 lbn = offset >> fs_bshift; 694 error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); 695 if (error) { 696 UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n", 697 lbn, error,0,0); 698 skipbytes += bytes; 699 goto loopdone; 700 } 701 702 /* 703 * see how many pages can be read with this i/o. 704 * reduce the i/o size if necessary to avoid 705 * overwriting pages with valid data. 706 */ 707 708 iobytes = min(((lbn + 1 + run) << fs_bshift) - offset, bytes); 709 if (offset + iobytes > round_page(offset)) { 710 pcount = 1; 711 while (pidx + pcount < npages && 712 pgs[pidx + pcount]->flags & PG_FAKE) { 713 pcount++; 714 } 715 iobytes = min(iobytes, (pcount << PAGE_SHIFT) - 716 (offset - trunc_page(offset))); 717 } 718 719 /* 720 * if this block isn't allocated, zero it instead of reading it. 721 * if this is a read access, mark the pages we zeroed PG_RDONLY. 722 */ 723 724 if (blkno < 0) { 725 UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0); 726 727 sawhole = TRUE; 728 memset((char *)kva + (offset - startoffset), 0, 729 iobytes); 730 skipbytes += iobytes; 731 732 if (!write) { 733 int holepages = 734 (round_page(offset + iobytes) - 735 trunc_page(offset)) >> PAGE_SHIFT; 736 for (i = 0; i < holepages; i++) { 737 pgs[pidx + i]->flags |= PG_RDONLY; 738 } 739 } 740 continue; 741 } 742 743 /* 744 * allocate a sub-buf for this piece of the i/o 745 * (or just use mbp if there's only 1 piece), 746 * and start it going. 747 */ 748 749 if (offset == startoffset && iobytes == bytes) { 750 bp = mbp; 751 } else { 752 s = splbio(); 753 bp = pool_get(&bufpool, PR_WAITOK); 754 splx(s); 755 bp->b_data = (char *)kva + offset - startoffset; 756 bp->b_resid = bp->b_bcount = iobytes; 757 bp->b_flags = B_BUSY|B_READ|B_CALL; 758 bp->b_iodone = uvm_aio_biodone1; 759 bp->b_vp = vp; 760 LIST_INIT(&bp->b_dep); 761 } 762 bp->b_lblkno = 0; 763 bp->b_private = mbp; 764 765 /* adjust physical blkno for partial blocks */ 766 bp->b_blkno = blkno + ((offset - (lbn << fs_bshift)) >> 767 dev_bshift); 768 769 UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x", 770 bp, offset, iobytes, bp->b_blkno); 771 772 VOP_STRATEGY(bp); 773 } 774 775 loopdone: 776 if (skipbytes) { 777 s = splbio(); 778 if (error) { 779 mbp->b_flags |= B_ERROR; 780 mbp->b_error = error; 781 } 782 mbp->b_resid -= skipbytes; 783 if (mbp->b_resid == 0) { 784 biodone(mbp); 785 } 786 splx(s); 787 } 788 789 if (async) { 790 UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0); 791 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 792 return EINPROGRESS; 793 } 794 if (bp != NULL) { 795 error = biowait(mbp); 796 } 797 s = splbio(); 798 pool_put(&bufpool, mbp); 799 splx(s); 800 uvm_pagermapout(kva, npages); 801 raoffset = offset; 802 803 /* 804 * if this we encountered a hole then we have to do a little more work. 805 * for read faults, we marked the page PG_RDONLY so that future 806 * write accesses to the page will fault again. 807 * for write faults, we must make sure that the backing store for 808 * the page is completely allocated while the pages are locked. 809 */ 810 811 if (error == 0 && sawhole && write) { 812 error = VOP_BALLOCN(vp, startoffset, npages << PAGE_SHIFT, 813 cred, 0); 814 if (error) { 815 UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d", 816 lbn, error,0,0); 817 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 818 simple_lock(&uobj->vmobjlock); 819 goto out; 820 } 821 } 822 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 823 simple_lock(&uobj->vmobjlock); 824 825 /* 826 * see if we want to start any readahead. 827 * XXXUBC for now, just read the next 128k on 64k boundaries. 828 * this is pretty nonsensical, but it is 50% faster than reading 829 * just the next 64k. 830 */ 831 832 raout: 833 if (!async && !write && ((int)raoffset & 0xffff) == 0 && 834 PAGE_SHIFT <= 16) { 835 int racount; 836 837 racount = 1 << (16 - PAGE_SHIFT); 838 (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0, 839 VM_PROT_READ, 0, 0); 840 simple_lock(&uobj->vmobjlock); 841 842 racount = 1 << (16 - PAGE_SHIFT); 843 (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0, 844 VM_PROT_READ, 0, 0); 845 simple_lock(&uobj->vmobjlock); 846 } 847 848 /* 849 * we're almost done! release the pages... 850 * for errors, we free the pages. 851 * otherwise we activate them and mark them as valid and clean. 852 * also, unbusy pages that were not actually requested. 853 */ 854 855 out: 856 if (error) { 857 uvm_lock_pageq(); 858 for (i = 0; i < npages; i++) { 859 if (pgs[i] == NULL) { 860 continue; 861 } 862 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 863 pgs[i], pgs[i]->flags, 0,0); 864 if ((pgs[i]->flags & PG_FAKE) == 0) { 865 continue; 866 } 867 if (pgs[i]->flags & PG_WANTED) { 868 wakeup(pgs[i]); 869 } 870 uvm_pagefree(pgs[i]); 871 } 872 uvm_unlock_pageq(); 873 simple_unlock(&uobj->vmobjlock); 874 UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0); 875 return error; 876 } 877 878 UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0); 879 for (i = 0; i < npages; i++) { 880 if (pgs[i] == NULL) { 881 continue; 882 } 883 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 884 pgs[i], pgs[i]->flags, 0,0); 885 if (pgs[i]->flags & PG_FAKE) { 886 UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x", 887 pgs[i], pgs[i]->offset,0,0); 888 pgs[i]->flags &= ~(PG_FAKE); 889 pmap_clear_modify(pgs[i]); 890 pmap_clear_reference(pgs[i]); 891 } 892 if (write) { 893 pgs[i]->flags &= ~(PG_RDONLY); 894 } 895 if (i < ridx || i >= ridx + orignpages || async) { 896 UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x", 897 pgs[i], pgs[i]->offset,0,0); 898 if (pgs[i]->flags & PG_WANTED) { 899 wakeup(pgs[i]); 900 } 901 if (pgs[i]->wire_count == 0) { 902 uvm_pageactivate(pgs[i]); 903 } 904 pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); 905 UVM_PAGE_OWN(pgs[i], NULL); 906 } 907 } 908 simple_unlock(&uobj->vmobjlock); 909 if (ap->a_m != NULL) { 910 memcpy(ap->a_m, &pgs[ridx], 911 orignpages * sizeof(struct vm_page *)); 912 } 913 return 0; 914 } 915 916 /* 917 * generic VM putpages routine. 918 * Write the given range of pages to backing store. 919 */ 920 921 int 922 genfs_putpages(v) 923 void *v; 924 { 925 struct vop_putpages_args /* { 926 struct vnode *a_vp; 927 struct vm_page **a_m; 928 int a_count; 929 int a_flags; 930 int *a_rtvals; 931 } */ *ap = v; 932 933 int s, error, error2, npages, run; 934 int fs_bshift, dev_bshift, dev_bsize; 935 vaddr_t kva; 936 off_t eof, offset, startoffset; 937 size_t bytes, iobytes, skipbytes; 938 daddr_t lbn, blkno; 939 struct vm_page *pg; 940 struct buf *mbp, *bp; 941 struct vnode *vp = ap->a_vp; 942 boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0; 943 UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist); 944 945 simple_unlock(&vp->v_uvm.u_obj.vmobjlock); 946 947 error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof); 948 if (error) { 949 return error; 950 } 951 952 error = error2 = 0; 953 npages = ap->a_count; 954 fs_bshift = vp->v_mount->mnt_fs_bshift; 955 dev_bshift = vp->v_mount->mnt_dev_bshift; 956 dev_bsize = 1 << dev_bshift; 957 KASSERT((eof & (dev_bsize - 1)) == 0); 958 959 pg = ap->a_m[0]; 960 startoffset = pg->offset; 961 bytes = min(npages << PAGE_SHIFT, eof - startoffset); 962 skipbytes = 0; 963 KASSERT(bytes != 0); 964 965 kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK); 966 967 s = splbio(); 968 vp->v_numoutput += 2; 969 mbp = pool_get(&bufpool, PR_WAITOK); 970 UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", 971 vp, mbp, vp->v_numoutput, bytes); 972 splx(s); 973 mbp->b_bufsize = npages << PAGE_SHIFT; 974 mbp->b_data = (void *)kva; 975 mbp->b_resid = mbp->b_bcount = bytes; 976 mbp->b_flags = B_BUSY|B_WRITE|B_AGE | 977 (async ? B_CALL : 0) | 978 (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0); 979 mbp->b_iodone = uvm_aio_biodone; 980 mbp->b_vp = vp; 981 LIST_INIT(&mbp->b_dep); 982 983 bp = NULL; 984 for (offset = startoffset; 985 bytes > 0; 986 offset += iobytes, bytes -= iobytes) { 987 lbn = offset >> fs_bshift; 988 error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); 989 if (error) { 990 UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0); 991 skipbytes += bytes; 992 bytes = 0; 993 break; 994 } 995 996 iobytes = min(((lbn + 1 + run) << fs_bshift) - offset, bytes); 997 if (blkno == (daddr_t)-1) { 998 skipbytes += iobytes; 999 continue; 1000 } 1001 1002 /* if it's really one i/o, don't make a second buf */ 1003 if (offset == startoffset && iobytes == bytes) { 1004 bp = mbp; 1005 } else { 1006 s = splbio(); 1007 vp->v_numoutput++; 1008 bp = pool_get(&bufpool, PR_WAITOK); 1009 UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", 1010 vp, bp, vp->v_numoutput, 0); 1011 splx(s); 1012 bp->b_data = (char *)kva + 1013 (vaddr_t)(offset - pg->offset); 1014 bp->b_resid = bp->b_bcount = iobytes; 1015 bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC; 1016 bp->b_iodone = uvm_aio_biodone1; 1017 bp->b_vp = vp; 1018 LIST_INIT(&bp->b_dep); 1019 } 1020 bp->b_lblkno = 0; 1021 bp->b_private = mbp; 1022 1023 /* adjust physical blkno for partial blocks */ 1024 bp->b_blkno = blkno + ((offset - (lbn << fs_bshift)) >> 1025 dev_bshift); 1026 UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x", 1027 vp, offset, bp->b_bcount, bp->b_blkno); 1028 VOP_STRATEGY(bp); 1029 } 1030 if (skipbytes) { 1031 UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0); 1032 s = splbio(); 1033 mbp->b_resid -= skipbytes; 1034 if (mbp->b_resid == 0) { 1035 biodone(mbp); 1036 } 1037 splx(s); 1038 } 1039 if (async) { 1040 UVMHIST_LOG(ubchist, "returning PEND", 0,0,0,0); 1041 return EINPROGRESS; 1042 } 1043 if (bp != NULL) { 1044 UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0); 1045 error2 = biowait(mbp); 1046 } 1047 if (bioops.io_pageiodone) { 1048 (*bioops.io_pageiodone)(mbp); 1049 } 1050 s = splbio(); 1051 vwakeup(mbp); 1052 pool_put(&bufpool, mbp); 1053 splx(s); 1054 uvm_pagermapout(kva, npages); 1055 UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0); 1056 return error ? error : error2; 1057 } 1058 1059 int 1060 genfs_size(v) 1061 void *v; 1062 { 1063 struct vop_size_args /* { 1064 struct vnode *a_vp; 1065 off_t a_size; 1066 off_t *a_eobp; 1067 } */ *ap = v; 1068 int bsize; 1069 1070 bsize = 1 << ap->a_vp->v_mount->mnt_fs_bshift; 1071 *ap->a_eobp = (ap->a_size + bsize) & ~(bsize - 1); 1072 return 0; 1073 } 1074