1 /* $NetBSD: genfs_vnops.c,v 1.24 2000/12/27 04:47:43 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 */ 36 37 #include "opt_nfsserver.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/kernel.h> 43 #include <sys/mount.h> 44 #include <sys/namei.h> 45 #include <sys/vnode.h> 46 #include <sys/fcntl.h> 47 #include <sys/malloc.h> 48 #include <sys/poll.h> 49 50 #include <miscfs/genfs/genfs.h> 51 #include <miscfs/specfs/specdev.h> 52 53 #include <uvm/uvm.h> 54 #include <uvm/uvm_pager.h> 55 56 #ifdef NFSSERVER 57 #include <nfs/rpcv2.h> 58 #include <nfs/nfsproto.h> 59 #include <nfs/nfs.h> 60 #include <nfs/nqnfs.h> 61 #include <nfs/nfs_var.h> 62 #endif 63 64 int 65 genfs_poll(v) 66 void *v; 67 { 68 struct vop_poll_args /* { 69 struct vnode *a_vp; 70 int a_events; 71 struct proc *a_p; 72 } */ *ap = v; 73 74 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 75 } 76 77 int 78 genfs_fsync(v) 79 void *v; 80 { 81 struct vop_fsync_args /* { 82 struct vnode *a_vp; 83 struct ucred *a_cred; 84 int a_flags; 85 off_t offlo; 86 off_t offhi; 87 struct proc *a_p; 88 } */ *ap = v; 89 struct vnode *vp = ap->a_vp; 90 int wait; 91 92 wait = (ap->a_flags & FSYNC_WAIT) != 0; 93 vflushbuf(vp, wait); 94 if ((ap->a_flags & FSYNC_DATAONLY) != 0) 95 return (0); 96 else 97 return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0)); 98 } 99 100 int 101 genfs_seek(v) 102 void *v; 103 { 104 struct vop_seek_args /* { 105 struct vnode *a_vp; 106 off_t a_oldoff; 107 off_t a_newoff; 108 struct ucred *a_ucred; 109 } */ *ap = v; 110 111 if (ap->a_newoff < 0) 112 return (EINVAL); 113 114 return (0); 115 } 116 117 int 118 genfs_abortop(v) 119 void *v; 120 { 121 struct vop_abortop_args /* { 122 struct vnode *a_dvp; 123 struct componentname *a_cnp; 124 } */ *ap = v; 125 126 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) 127 PNBUF_PUT(ap->a_cnp->cn_pnbuf); 128 return (0); 129 } 130 131 int 132 genfs_fcntl(v) 133 void *v; 134 { 135 struct vop_fcntl_args /* { 136 struct vnode *a_vp; 137 u_int a_command; 138 caddr_t a_data; 139 int a_fflag; 140 struct ucred *a_cred; 141 struct proc *a_p; 142 } */ *ap = v; 143 144 if (ap->a_command == F_SETFL) 145 return (0); 146 else 147 return (EOPNOTSUPP); 148 } 149 150 /*ARGSUSED*/ 151 int 152 genfs_badop(v) 153 void *v; 154 { 155 156 panic("genfs: bad op"); 157 } 158 159 /*ARGSUSED*/ 160 int 161 genfs_nullop(v) 162 void *v; 163 { 164 165 return (0); 166 } 167 168 /*ARGSUSED*/ 169 int 170 genfs_einval(v) 171 void *v; 172 { 173 174 return (EINVAL); 175 } 176 177 /*ARGSUSED*/ 178 int 179 genfs_eopnotsupp(v) 180 void *v; 181 { 182 183 return (EOPNOTSUPP); 184 } 185 186 /* 187 * Called when an fs doesn't support a particular vop but the vop needs to 188 * vrele, vput, or vunlock passed in vnodes. 189 */ 190 int 191 genfs_eopnotsupp_rele(v) 192 void *v; 193 { 194 struct vop_generic_args /* 195 struct vnodeop_desc *a_desc; 196 / * other random data follows, presumably * / 197 } */ *ap = v; 198 struct vnodeop_desc *desc = ap->a_desc; 199 struct vnode *vp; 200 int flags, i, j, offset; 201 202 flags = desc->vdesc_flags; 203 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) { 204 if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET) 205 break; /* stop at end of list */ 206 if ((j = flags & VDESC_VP0_WILLPUT)) { 207 vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap); 208 switch (j) { 209 case VDESC_VP0_WILLPUT: 210 vput(vp); 211 break; 212 case VDESC_VP0_WILLUNLOCK: 213 VOP_UNLOCK(vp, 0); 214 break; 215 case VDESC_VP0_WILLRELE: 216 vrele(vp); 217 break; 218 } 219 } 220 } 221 222 return (EOPNOTSUPP); 223 } 224 225 /*ARGSUSED*/ 226 int 227 genfs_ebadf(v) 228 void *v; 229 { 230 231 return (EBADF); 232 } 233 234 /* ARGSUSED */ 235 int 236 genfs_enoioctl(v) 237 void *v; 238 { 239 240 return (ENOTTY); 241 } 242 243 244 /* 245 * Eliminate all activity associated with the requested vnode 246 * and with all vnodes aliased to the requested vnode. 247 */ 248 int 249 genfs_revoke(v) 250 void *v; 251 { 252 struct vop_revoke_args /* { 253 struct vnode *a_vp; 254 int a_flags; 255 } */ *ap = v; 256 struct vnode *vp, *vq; 257 struct proc *p = curproc; /* XXX */ 258 259 #ifdef DIAGNOSTIC 260 if ((ap->a_flags & REVOKEALL) == 0) 261 panic("genfs_revoke: not revokeall"); 262 #endif 263 264 vp = ap->a_vp; 265 simple_lock(&vp->v_interlock); 266 267 if (vp->v_flag & VALIASED) { 268 /* 269 * If a vgone (or vclean) is already in progress, 270 * wait until it is done and return. 271 */ 272 if (vp->v_flag & VXLOCK) { 273 vp->v_flag |= VXWANT; 274 simple_unlock(&vp->v_interlock); 275 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 276 return (0); 277 } 278 /* 279 * Ensure that vp will not be vgone'd while we 280 * are eliminating its aliases. 281 */ 282 vp->v_flag |= VXLOCK; 283 simple_unlock(&vp->v_interlock); 284 while (vp->v_flag & VALIASED) { 285 simple_lock(&spechash_slock); 286 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 287 if (vq->v_rdev != vp->v_rdev || 288 vq->v_type != vp->v_type || vp == vq) 289 continue; 290 simple_unlock(&spechash_slock); 291 vgone(vq); 292 break; 293 } 294 if (vq == NULLVP) 295 simple_unlock(&spechash_slock); 296 } 297 /* 298 * Remove the lock so that vgone below will 299 * really eliminate the vnode after which time 300 * vgone will awaken any sleepers. 301 */ 302 simple_lock(&vp->v_interlock); 303 vp->v_flag &= ~VXLOCK; 304 } 305 vgonel(vp, p); 306 return (0); 307 } 308 309 /* 310 * Lock the node. 311 */ 312 int 313 genfs_lock(v) 314 void *v; 315 { 316 struct vop_lock_args /* { 317 struct vnode *a_vp; 318 int a_flags; 319 } */ *ap = v; 320 struct vnode *vp = ap->a_vp; 321 322 return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock)); 323 } 324 325 /* 326 * Unlock the node. 327 */ 328 int 329 genfs_unlock(v) 330 void *v; 331 { 332 struct vop_unlock_args /* { 333 struct vnode *a_vp; 334 int a_flags; 335 } */ *ap = v; 336 struct vnode *vp = ap->a_vp; 337 338 return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE, 339 &vp->v_interlock)); 340 } 341 342 /* 343 * Return whether or not the node is locked. 344 */ 345 int 346 genfs_islocked(v) 347 void *v; 348 { 349 struct vop_islocked_args /* { 350 struct vnode *a_vp; 351 } */ *ap = v; 352 struct vnode *vp = ap->a_vp; 353 354 return (lockstatus(&vp->v_lock)); 355 } 356 357 /* 358 * Stubs to use when there is no locking to be done on the underlying object. 359 */ 360 int 361 genfs_nolock(v) 362 void *v; 363 { 364 struct vop_lock_args /* { 365 struct vnode *a_vp; 366 int a_flags; 367 struct proc *a_p; 368 } */ *ap = v; 369 370 /* 371 * Since we are not using the lock manager, we must clear 372 * the interlock here. 373 */ 374 if (ap->a_flags & LK_INTERLOCK) 375 simple_unlock(&ap->a_vp->v_interlock); 376 return (0); 377 } 378 379 int 380 genfs_nounlock(v) 381 void *v; 382 { 383 return (0); 384 } 385 386 int 387 genfs_noislocked(v) 388 void *v; 389 { 390 return (0); 391 } 392 393 /* 394 * Local lease check for NFS servers. Just set up args and let 395 * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel, 396 * this is a null operation. 397 */ 398 int 399 genfs_lease_check(v) 400 void *v; 401 { 402 #ifdef NFSSERVER 403 struct vop_lease_args /* { 404 struct vnode *a_vp; 405 struct proc *a_p; 406 struct ucred *a_cred; 407 int a_flag; 408 } */ *ap = v; 409 u_int32_t duration = 0; 410 int cache; 411 u_quad_t frev; 412 413 (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag, 414 NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred); 415 return (0); 416 #else 417 return (0); 418 #endif /* NFSSERVER */ 419 } 420 421 /* 422 * generic VM getpages routine. 423 * Return PG_BUSY pages for the given range, 424 * reading from backing store if necessary. 425 */ 426 427 int 428 genfs_getpages(v) 429 void *v; 430 { 431 struct vop_getpages_args /* { 432 struct vnode *a_vp; 433 voff_t a_offset; 434 vm_page_t *a_m; 435 int *a_count; 436 int a_centeridx; 437 vm_prot_t a_access_type; 438 int a_advice; 439 int a_flags; 440 } */ *ap = v; 441 442 off_t eof, offset, origoffset, startoffset, endoffset, raoffset; 443 daddr_t lbn, blkno; 444 int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount; 445 int fs_bshift, fs_bsize, dev_bshift, dev_bsize; 446 int flags = ap->a_flags; 447 size_t bytes, iobytes, tailbytes, totalbytes, skipbytes; 448 vaddr_t kva; 449 struct buf *bp, *mbp; 450 struct vnode *vp = ap->a_vp; 451 struct uvm_object *uobj = &vp->v_uvm.u_obj; 452 struct vm_page *pgs[16]; /* XXXUBC 16 */ 453 struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */ 454 boolean_t async = (flags & PGO_SYNCIO) == 0; 455 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 456 boolean_t sawhole = FALSE; 457 UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist); 458 459 /* XXXUBC temp limit */ 460 if (*ap->a_count > 16) { 461 return EINVAL; 462 } 463 464 error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof); 465 if (error) { 466 return error; 467 } 468 469 #ifdef DIAGNOSTIC 470 if (ap->a_centeridx < 0 || ap->a_centeridx > *ap->a_count) { 471 panic("genfs_getpages: centeridx %d out of range", 472 ap->a_centeridx); 473 } 474 if (ap->a_offset & (PAGE_SIZE - 1) || ap->a_offset < 0) { 475 panic("genfs_getpages: offset 0x%x", (int)ap->a_offset); 476 } 477 if (*ap->a_count < 0) { 478 panic("genfs_getpages: count %d < 0", *ap->a_count); 479 } 480 #endif 481 482 /* 483 * Bounds-check the request. 484 */ 485 486 error = 0; 487 origoffset = ap->a_offset; 488 489 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= eof && 490 (flags & PGO_PASTEOF) == 0) { 491 if ((flags & PGO_LOCKED) == 0) { 492 simple_unlock(&uobj->vmobjlock); 493 } 494 UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x", 495 origoffset, *ap->a_count, eof,0); 496 return EINVAL; 497 } 498 499 /* 500 * For PGO_LOCKED requests, just return whatever's in memory. 501 */ 502 503 if (flags & PGO_LOCKED) { 504 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, 505 UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY); 506 507 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; 508 } 509 510 /* vnode is VOP_LOCKed, uobj is locked */ 511 512 if (write && (vp->v_flag & VONWORKLST) == 0) { 513 vn_syncer_add_to_worklist(vp, filedelay); 514 } 515 516 /* 517 * find the requested pages and make some simple checks. 518 * leave space in the page array for a whole block. 519 */ 520 521 fs_bshift = vp->v_mount->mnt_fs_bshift; 522 fs_bsize = 1 << fs_bshift; 523 dev_bshift = vp->v_mount->mnt_dev_bshift; 524 dev_bsize = 1 << dev_bshift; 525 KASSERT((eof & (dev_bsize - 1)) == 0); 526 527 orignpages = min(*ap->a_count, 528 round_page(eof - origoffset) >> PAGE_SHIFT); 529 if (flags & PGO_PASTEOF) { 530 orignpages = *ap->a_count; 531 } 532 npages = orignpages; 533 startoffset = origoffset & ~(fs_bsize - 1); 534 endoffset = round_page((origoffset + (npages << PAGE_SHIFT) 535 + fs_bsize - 1) & ~(fs_bsize - 1)); 536 endoffset = min(endoffset, round_page(eof)); 537 ridx = (origoffset - startoffset) >> PAGE_SHIFT; 538 539 memset(pgs, 0, sizeof(pgs)); 540 uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL); 541 542 /* 543 * if PGO_OVERWRITE is set, don't bother reading the pages. 544 * PGO_OVERWRITE also means that the caller guarantees 545 * that the pages already have backing store allocated. 546 */ 547 548 if (flags & PGO_OVERWRITE) { 549 UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); 550 551 for (i = 0; i < npages; i++) { 552 struct vm_page *pg = pgs[ridx + i]; 553 554 if (pg->flags & PG_FAKE) { 555 uvm_pagezero(pg); 556 pg->flags &= ~(PG_FAKE); 557 } 558 pg->flags &= ~(PG_RDONLY); 559 } 560 goto out; 561 } 562 563 /* 564 * if the pages are already resident, just return them. 565 */ 566 567 for (i = 0; i < npages; i++) { 568 struct vm_page *pg = pgs[ridx + i]; 569 570 if ((pg->flags & PG_FAKE) || 571 (write && (pg->flags & PG_RDONLY))) { 572 break; 573 } 574 } 575 if (i == npages) { 576 UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); 577 raoffset = origoffset + (orignpages << PAGE_SHIFT); 578 goto raout; 579 } 580 581 /* 582 * the page wasn't resident and we're not overwriting, 583 * so we're going to have to do some i/o. 584 * find any additional pages needed to cover the expanded range. 585 */ 586 587 if (startoffset != origoffset) { 588 589 /* 590 * XXXUBC we need to avoid deadlocks caused by locking 591 * additional pages at lower offsets than pages we 592 * already have locked. for now, unlock them all and 593 * start over. 594 */ 595 596 for (i = 0; i < npages; i++) { 597 struct vm_page *pg = pgs[ridx + i]; 598 599 if (pg->flags & PG_FAKE) { 600 pg->flags |= PG_RELEASED; 601 } 602 } 603 uvm_page_unbusy(&pgs[ridx], npages); 604 memset(pgs, 0, sizeof(pgs)); 605 606 UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x", 607 startoffset, endoffset, 0,0); 608 npages = (endoffset - startoffset) >> PAGE_SHIFT; 609 npgs = npages; 610 uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL); 611 } 612 simple_unlock(&uobj->vmobjlock); 613 614 /* 615 * read the desired page(s). 616 */ 617 618 totalbytes = npages << PAGE_SHIFT; 619 bytes = min(totalbytes, eof - startoffset); 620 tailbytes = totalbytes - bytes; 621 skipbytes = 0; 622 623 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK | 624 UVMPAGER_MAPIN_READ); 625 626 s = splbio(); 627 mbp = pool_get(&bufpool, PR_WAITOK); 628 splx(s); 629 mbp->b_bufsize = totalbytes; 630 mbp->b_data = (void *)kva; 631 mbp->b_resid = mbp->b_bcount = bytes; 632 mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0); 633 mbp->b_iodone = uvm_aio_biodone; 634 mbp->b_vp = vp; 635 LIST_INIT(&mbp->b_dep); 636 637 /* 638 * if EOF is in the middle of the last page, zero the part past EOF. 639 */ 640 641 if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) { 642 memset((void *)(kva + bytes), 0, tailbytes); 643 } 644 645 /* 646 * now loop over the pages, reading as needed. 647 */ 648 649 if (write) { 650 lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL); 651 } else { 652 lockmgr(&vp->v_glock, LK_SHARED, NULL); 653 } 654 655 bp = NULL; 656 for (offset = startoffset; 657 bytes > 0; 658 offset += iobytes, bytes -= iobytes) { 659 660 /* 661 * skip pages which don't need to be read. 662 */ 663 664 pidx = (offset - startoffset) >> PAGE_SHIFT; 665 while ((pgs[pidx]->flags & PG_FAKE) == 0) { 666 size_t b; 667 668 KASSERT((offset & (PAGE_SIZE - 1)) == 0); 669 b = min(PAGE_SIZE, bytes); 670 offset += b; 671 bytes -= b; 672 skipbytes += b; 673 pidx++; 674 UVMHIST_LOG(ubchist, "skipping, new offset 0x%x", 675 offset, 0,0,0); 676 if (bytes == 0) { 677 goto loopdone; 678 } 679 } 680 681 /* 682 * bmap the file to find out the blkno to read from and 683 * how much we can read in one i/o. if bmap returns an error, 684 * skip the rest of the top-level i/o. 685 */ 686 687 lbn = offset >> fs_bshift; 688 error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); 689 if (error) { 690 UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n", 691 lbn, error,0,0); 692 skipbytes += bytes; 693 goto loopdone; 694 } 695 696 /* 697 * see how many pages can be read with this i/o. 698 * reduce the i/o size if necessary to avoid 699 * overwriting pages with valid data. 700 */ 701 702 iobytes = min(((lbn + 1 + run) << fs_bshift) - offset, bytes); 703 if (offset + iobytes > round_page(offset)) { 704 pcount = 1; 705 while (pidx + pcount < npages && 706 pgs[pidx + pcount]->flags & PG_FAKE) { 707 pcount++; 708 } 709 iobytes = min(iobytes, (pcount << PAGE_SHIFT) - 710 (offset - trunc_page(offset))); 711 } 712 713 /* 714 * if this block isn't allocated, zero it instead of reading it. 715 * if this is a read access, mark the pages we zeroed PG_RDONLY. 716 */ 717 718 if (blkno < 0) { 719 UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0); 720 721 sawhole = TRUE; 722 memset((char *)kva + (offset - startoffset), 0, 723 iobytes); 724 skipbytes += iobytes; 725 726 if (!write) { 727 int holepages = 728 (round_page(offset + iobytes) - 729 trunc_page(offset)) >> PAGE_SHIFT; 730 for (i = 0; i < holepages; i++) { 731 pgs[pidx + i]->flags |= PG_RDONLY; 732 } 733 } 734 continue; 735 } 736 737 /* 738 * allocate a sub-buf for this piece of the i/o 739 * (or just use mbp if there's only 1 piece), 740 * and start it going. 741 */ 742 743 if (offset == startoffset && iobytes == bytes) { 744 bp = mbp; 745 } else { 746 s = splbio(); 747 bp = pool_get(&bufpool, PR_WAITOK); 748 splx(s); 749 bp->b_data = (char *)kva + offset - startoffset; 750 bp->b_resid = bp->b_bcount = iobytes; 751 bp->b_flags = B_BUSY|B_READ|B_CALL; 752 bp->b_iodone = uvm_aio_biodone1; 753 bp->b_vp = vp; 754 LIST_INIT(&bp->b_dep); 755 } 756 bp->b_lblkno = 0; 757 bp->b_private = mbp; 758 759 /* adjust physical blkno for partial blocks */ 760 bp->b_blkno = blkno + ((offset - (lbn << fs_bshift)) >> 761 dev_bshift); 762 763 UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x", 764 bp, offset, iobytes, bp->b_blkno); 765 766 VOP_STRATEGY(bp); 767 } 768 769 loopdone: 770 if (skipbytes) { 771 s = splbio(); 772 if (error) { 773 mbp->b_flags |= B_ERROR; 774 mbp->b_error = error; 775 } 776 mbp->b_resid -= skipbytes; 777 if (mbp->b_resid == 0) { 778 biodone(mbp); 779 } 780 splx(s); 781 } 782 783 if (async) { 784 UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0); 785 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 786 return EINPROGRESS; 787 } 788 if (bp != NULL) { 789 error = biowait(mbp); 790 } 791 s = splbio(); 792 pool_put(&bufpool, mbp); 793 splx(s); 794 uvm_pagermapout(kva, npages); 795 raoffset = startoffset + totalbytes; 796 797 /* 798 * if this we encountered a hole then we have to do a little more work. 799 * for read faults, we marked the page PG_RDONLY so that future 800 * write accesses to the page will fault again. 801 * for write faults, we must make sure that the backing store for 802 * the page is completely allocated while the pages are locked. 803 */ 804 805 if (error == 0 && sawhole && write) { 806 error = VOP_BALLOCN(vp, startoffset, npages << PAGE_SHIFT, 807 cred, 0); 808 if (error) { 809 UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d", 810 lbn, error,0,0); 811 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 812 simple_lock(&uobj->vmobjlock); 813 goto out; 814 } 815 } 816 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 817 simple_lock(&uobj->vmobjlock); 818 819 /* 820 * see if we want to start any readahead. 821 * XXXUBC for now, just read the next 128k on 64k boundaries. 822 * this is pretty nonsensical, but it is 50% faster than reading 823 * just the next 64k. 824 */ 825 826 raout: 827 if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 && 828 PAGE_SHIFT <= 16) { 829 int racount; 830 831 racount = 1 << (16 - PAGE_SHIFT); 832 (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0, 833 VM_PROT_READ, 0, 0); 834 simple_lock(&uobj->vmobjlock); 835 836 racount = 1 << (16 - PAGE_SHIFT); 837 (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0, 838 VM_PROT_READ, 0, 0); 839 simple_lock(&uobj->vmobjlock); 840 } 841 842 /* 843 * we're almost done! release the pages... 844 * for errors, we free the pages. 845 * otherwise we activate them and mark them as valid and clean. 846 * also, unbusy pages that were not actually requested. 847 */ 848 849 out: 850 if (error) { 851 uvm_lock_pageq(); 852 for (i = 0; i < npages; i++) { 853 if (pgs[i] == NULL) { 854 continue; 855 } 856 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 857 pgs[i], pgs[i]->flags, 0,0); 858 if ((pgs[i]->flags & PG_FAKE) == 0) { 859 continue; 860 } 861 if (pgs[i]->flags & PG_WANTED) { 862 wakeup(pgs[i]); 863 } 864 uvm_pagefree(pgs[i]); 865 } 866 uvm_unlock_pageq(); 867 simple_unlock(&uobj->vmobjlock); 868 UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0); 869 return error; 870 } 871 872 UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0); 873 for (i = 0; i < npages; i++) { 874 if (pgs[i] == NULL) { 875 continue; 876 } 877 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 878 pgs[i], pgs[i]->flags, 0,0); 879 if (pgs[i]->flags & PG_FAKE) { 880 UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x", 881 pgs[i], pgs[i]->offset,0,0); 882 pgs[i]->flags &= ~(PG_FAKE); 883 pmap_clear_modify(pgs[i]); 884 pmap_clear_reference(pgs[i]); 885 } 886 if (write) { 887 pgs[i]->flags &= ~(PG_RDONLY); 888 } 889 if (i < ridx || i >= ridx + orignpages || async) { 890 UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x", 891 pgs[i], pgs[i]->offset,0,0); 892 if (pgs[i]->flags & PG_WANTED) { 893 wakeup(pgs[i]); 894 } 895 if (pgs[i]->wire_count == 0) { 896 uvm_pageactivate(pgs[i]); 897 } 898 pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); 899 UVM_PAGE_OWN(pgs[i], NULL); 900 } 901 } 902 simple_unlock(&uobj->vmobjlock); 903 if (ap->a_m != NULL) { 904 memcpy(ap->a_m, &pgs[ridx], 905 orignpages * sizeof(struct vm_page *)); 906 } 907 return 0; 908 } 909 910 /* 911 * generic VM putpages routine. 912 * Write the given range of pages to backing store. 913 */ 914 915 int 916 genfs_putpages(v) 917 void *v; 918 { 919 struct vop_putpages_args /* { 920 struct vnode *a_vp; 921 struct vm_page **a_m; 922 int a_count; 923 int a_flags; 924 int *a_rtvals; 925 } */ *ap = v; 926 927 int s, error, error2, npages, run; 928 int fs_bshift, dev_bshift, dev_bsize; 929 vaddr_t kva; 930 off_t eof, offset, startoffset; 931 size_t bytes, iobytes, skipbytes; 932 daddr_t lbn, blkno; 933 struct vm_page *pg; 934 struct buf *mbp, *bp; 935 struct vnode *vp = ap->a_vp; 936 boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0; 937 UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist); 938 939 simple_unlock(&vp->v_uvm.u_obj.vmobjlock); 940 941 error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof); 942 if (error) { 943 return error; 944 } 945 946 error = error2 = 0; 947 npages = ap->a_count; 948 fs_bshift = vp->v_mount->mnt_fs_bshift; 949 dev_bshift = vp->v_mount->mnt_dev_bshift; 950 dev_bsize = 1 << dev_bshift; 951 KASSERT((eof & (dev_bsize - 1)) == 0); 952 953 pg = ap->a_m[0]; 954 startoffset = pg->offset; 955 bytes = min(npages << PAGE_SHIFT, eof - startoffset); 956 skipbytes = 0; 957 KASSERT(bytes != 0); 958 959 kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK); 960 961 s = splbio(); 962 vp->v_numoutput += 2; 963 mbp = pool_get(&bufpool, PR_WAITOK); 964 UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", 965 vp, mbp, vp->v_numoutput, bytes); 966 splx(s); 967 mbp->b_bufsize = npages << PAGE_SHIFT; 968 mbp->b_data = (void *)kva; 969 mbp->b_resid = mbp->b_bcount = bytes; 970 mbp->b_flags = B_BUSY|B_WRITE|B_AGE | 971 (async ? B_CALL : 0) | 972 (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0); 973 mbp->b_iodone = uvm_aio_biodone; 974 mbp->b_vp = vp; 975 LIST_INIT(&mbp->b_dep); 976 977 bp = NULL; 978 for (offset = startoffset; 979 bytes > 0; 980 offset += iobytes, bytes -= iobytes) { 981 lbn = offset >> fs_bshift; 982 error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); 983 if (error) { 984 UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0); 985 skipbytes += bytes; 986 bytes = 0; 987 break; 988 } 989 990 iobytes = min(((lbn + 1 + run) << fs_bshift) - offset, bytes); 991 if (blkno == (daddr_t)-1) { 992 skipbytes += iobytes; 993 continue; 994 } 995 996 /* if it's really one i/o, don't make a second buf */ 997 if (offset == startoffset && iobytes == bytes) { 998 bp = mbp; 999 } else { 1000 s = splbio(); 1001 vp->v_numoutput++; 1002 bp = pool_get(&bufpool, PR_WAITOK); 1003 UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", 1004 vp, bp, vp->v_numoutput, 0); 1005 splx(s); 1006 bp->b_data = (char *)kva + 1007 (vaddr_t)(offset - pg->offset); 1008 bp->b_resid = bp->b_bcount = iobytes; 1009 bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC; 1010 bp->b_iodone = uvm_aio_biodone1; 1011 bp->b_vp = vp; 1012 LIST_INIT(&bp->b_dep); 1013 } 1014 bp->b_lblkno = 0; 1015 bp->b_private = mbp; 1016 1017 /* adjust physical blkno for partial blocks */ 1018 bp->b_blkno = blkno + ((offset - (lbn << fs_bshift)) >> 1019 dev_bshift); 1020 UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x", 1021 vp, offset, bp->b_bcount, bp->b_blkno); 1022 VOP_STRATEGY(bp); 1023 } 1024 if (skipbytes) { 1025 UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0); 1026 s = splbio(); 1027 mbp->b_resid -= skipbytes; 1028 if (mbp->b_resid == 0) { 1029 biodone(mbp); 1030 } 1031 splx(s); 1032 } 1033 if (async) { 1034 UVMHIST_LOG(ubchist, "returning PEND", 0,0,0,0); 1035 return EINPROGRESS; 1036 } 1037 if (bp != NULL) { 1038 UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0); 1039 error2 = biowait(mbp); 1040 } 1041 if (bioops.io_pageiodone) { 1042 (*bioops.io_pageiodone)(mbp); 1043 } 1044 s = splbio(); 1045 vwakeup(mbp); 1046 pool_put(&bufpool, mbp); 1047 splx(s); 1048 uvm_pagermapout(kva, npages); 1049 UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0); 1050 return error ? error : error2; 1051 } 1052 1053 int 1054 genfs_size(v) 1055 void *v; 1056 { 1057 struct vop_size_args /* { 1058 struct vnode *a_vp; 1059 off_t a_size; 1060 off_t *a_eobp; 1061 } */ *ap = v; 1062 int bsize; 1063 1064 bsize = 1 << ap->a_vp->v_mount->mnt_fs_bshift; 1065 *ap->a_eobp = (ap->a_size + bsize - 1) & ~(bsize - 1); 1066 return 0; 1067 } 1068