1 /* $NetBSD: genfs_vnops.c,v 1.35 2001/06/14 08:22:14 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 */ 36 37 #include "opt_nfsserver.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/kernel.h> 43 #include <sys/mount.h> 44 #include <sys/namei.h> 45 #include <sys/vnode.h> 46 #include <sys/fcntl.h> 47 #include <sys/malloc.h> 48 #include <sys/poll.h> 49 50 #include <miscfs/genfs/genfs.h> 51 #include <miscfs/specfs/specdev.h> 52 53 #include <uvm/uvm.h> 54 #include <uvm/uvm_pager.h> 55 56 #ifdef NFSSERVER 57 #include <nfs/rpcv2.h> 58 #include <nfs/nfsproto.h> 59 #include <nfs/nfs.h> 60 #include <nfs/nqnfs.h> 61 #include <nfs/nfs_var.h> 62 #endif 63 64 int 65 genfs_poll(v) 66 void *v; 67 { 68 struct vop_poll_args /* { 69 struct vnode *a_vp; 70 int a_events; 71 struct proc *a_p; 72 } */ *ap = v; 73 74 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 75 } 76 77 int 78 genfs_fsync(v) 79 void *v; 80 { 81 struct vop_fsync_args /* { 82 struct vnode *a_vp; 83 struct ucred *a_cred; 84 int a_flags; 85 off_t offlo; 86 off_t offhi; 87 struct proc *a_p; 88 } */ *ap = v; 89 struct vnode *vp = ap->a_vp; 90 int wait; 91 92 wait = (ap->a_flags & FSYNC_WAIT) != 0; 93 vflushbuf(vp, wait); 94 if ((ap->a_flags & FSYNC_DATAONLY) != 0) 95 return (0); 96 else 97 return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0)); 98 } 99 100 int 101 genfs_seek(v) 102 void *v; 103 { 104 struct vop_seek_args /* { 105 struct vnode *a_vp; 106 off_t a_oldoff; 107 off_t a_newoff; 108 struct ucred *a_ucred; 109 } */ *ap = v; 110 111 if (ap->a_newoff < 0) 112 return (EINVAL); 113 114 return (0); 115 } 116 117 int 118 genfs_abortop(v) 119 void *v; 120 { 121 struct vop_abortop_args /* { 122 struct vnode *a_dvp; 123 struct componentname *a_cnp; 124 } */ *ap = v; 125 126 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) 127 PNBUF_PUT(ap->a_cnp->cn_pnbuf); 128 return (0); 129 } 130 131 int 132 genfs_fcntl(v) 133 void *v; 134 { 135 struct vop_fcntl_args /* { 136 struct vnode *a_vp; 137 u_int a_command; 138 caddr_t a_data; 139 int a_fflag; 140 struct ucred *a_cred; 141 struct proc *a_p; 142 } */ *ap = v; 143 144 if (ap->a_command == F_SETFL) 145 return (0); 146 else 147 return (EOPNOTSUPP); 148 } 149 150 /*ARGSUSED*/ 151 int 152 genfs_badop(v) 153 void *v; 154 { 155 156 panic("genfs: bad op"); 157 } 158 159 /*ARGSUSED*/ 160 int 161 genfs_nullop(v) 162 void *v; 163 { 164 165 return (0); 166 } 167 168 /*ARGSUSED*/ 169 int 170 genfs_einval(v) 171 void *v; 172 { 173 174 return (EINVAL); 175 } 176 177 /*ARGSUSED*/ 178 int 179 genfs_eopnotsupp(v) 180 void *v; 181 { 182 183 return (EOPNOTSUPP); 184 } 185 186 /* 187 * Called when an fs doesn't support a particular vop but the vop needs to 188 * vrele, vput, or vunlock passed in vnodes. 189 */ 190 int 191 genfs_eopnotsupp_rele(v) 192 void *v; 193 { 194 struct vop_generic_args /* 195 struct vnodeop_desc *a_desc; 196 / * other random data follows, presumably * / 197 } */ *ap = v; 198 struct vnodeop_desc *desc = ap->a_desc; 199 struct vnode *vp; 200 int flags, i, j, offset; 201 202 flags = desc->vdesc_flags; 203 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) { 204 if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET) 205 break; /* stop at end of list */ 206 if ((j = flags & VDESC_VP0_WILLPUT)) { 207 vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap); 208 switch (j) { 209 case VDESC_VP0_WILLPUT: 210 vput(vp); 211 break; 212 case VDESC_VP0_WILLUNLOCK: 213 VOP_UNLOCK(vp, 0); 214 break; 215 case VDESC_VP0_WILLRELE: 216 vrele(vp); 217 break; 218 } 219 } 220 } 221 222 return (EOPNOTSUPP); 223 } 224 225 /*ARGSUSED*/ 226 int 227 genfs_ebadf(v) 228 void *v; 229 { 230 231 return (EBADF); 232 } 233 234 /* ARGSUSED */ 235 int 236 genfs_enoioctl(v) 237 void *v; 238 { 239 240 return (ENOTTY); 241 } 242 243 244 /* 245 * Eliminate all activity associated with the requested vnode 246 * and with all vnodes aliased to the requested vnode. 247 */ 248 int 249 genfs_revoke(v) 250 void *v; 251 { 252 struct vop_revoke_args /* { 253 struct vnode *a_vp; 254 int a_flags; 255 } */ *ap = v; 256 struct vnode *vp, *vq; 257 struct proc *p = curproc; /* XXX */ 258 259 #ifdef DIAGNOSTIC 260 if ((ap->a_flags & REVOKEALL) == 0) 261 panic("genfs_revoke: not revokeall"); 262 #endif 263 264 vp = ap->a_vp; 265 simple_lock(&vp->v_interlock); 266 267 if (vp->v_flag & VALIASED) { 268 /* 269 * If a vgone (or vclean) is already in progress, 270 * wait until it is done and return. 271 */ 272 if (vp->v_flag & VXLOCK) { 273 vp->v_flag |= VXWANT; 274 simple_unlock(&vp->v_interlock); 275 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 276 return (0); 277 } 278 /* 279 * Ensure that vp will not be vgone'd while we 280 * are eliminating its aliases. 281 */ 282 vp->v_flag |= VXLOCK; 283 simple_unlock(&vp->v_interlock); 284 while (vp->v_flag & VALIASED) { 285 simple_lock(&spechash_slock); 286 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 287 if (vq->v_rdev != vp->v_rdev || 288 vq->v_type != vp->v_type || vp == vq) 289 continue; 290 simple_unlock(&spechash_slock); 291 vgone(vq); 292 break; 293 } 294 if (vq == NULLVP) 295 simple_unlock(&spechash_slock); 296 } 297 /* 298 * Remove the lock so that vgone below will 299 * really eliminate the vnode after which time 300 * vgone will awaken any sleepers. 301 */ 302 simple_lock(&vp->v_interlock); 303 vp->v_flag &= ~VXLOCK; 304 } 305 vgonel(vp, p); 306 return (0); 307 } 308 309 /* 310 * Lock the node. 311 */ 312 int 313 genfs_lock(v) 314 void *v; 315 { 316 struct vop_lock_args /* { 317 struct vnode *a_vp; 318 int a_flags; 319 } */ *ap = v; 320 struct vnode *vp = ap->a_vp; 321 322 return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock)); 323 } 324 325 /* 326 * Unlock the node. 327 */ 328 int 329 genfs_unlock(v) 330 void *v; 331 { 332 struct vop_unlock_args /* { 333 struct vnode *a_vp; 334 int a_flags; 335 } */ *ap = v; 336 struct vnode *vp = ap->a_vp; 337 338 return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE, 339 &vp->v_interlock)); 340 } 341 342 /* 343 * Return whether or not the node is locked. 344 */ 345 int 346 genfs_islocked(v) 347 void *v; 348 { 349 struct vop_islocked_args /* { 350 struct vnode *a_vp; 351 } */ *ap = v; 352 struct vnode *vp = ap->a_vp; 353 354 return (lockstatus(&vp->v_lock)); 355 } 356 357 /* 358 * Stubs to use when there is no locking to be done on the underlying object. 359 */ 360 int 361 genfs_nolock(v) 362 void *v; 363 { 364 struct vop_lock_args /* { 365 struct vnode *a_vp; 366 int a_flags; 367 struct proc *a_p; 368 } */ *ap = v; 369 370 /* 371 * Since we are not using the lock manager, we must clear 372 * the interlock here. 373 */ 374 if (ap->a_flags & LK_INTERLOCK) 375 simple_unlock(&ap->a_vp->v_interlock); 376 return (0); 377 } 378 379 int 380 genfs_nounlock(v) 381 void *v; 382 { 383 return (0); 384 } 385 386 int 387 genfs_noislocked(v) 388 void *v; 389 { 390 return (0); 391 } 392 393 /* 394 * Local lease check for NFS servers. Just set up args and let 395 * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel, 396 * this is a null operation. 397 */ 398 int 399 genfs_lease_check(v) 400 void *v; 401 { 402 #ifdef NFSSERVER 403 struct vop_lease_args /* { 404 struct vnode *a_vp; 405 struct proc *a_p; 406 struct ucred *a_cred; 407 int a_flag; 408 } */ *ap = v; 409 u_int32_t duration = 0; 410 int cache; 411 u_quad_t frev; 412 413 (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag, 414 NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred); 415 return (0); 416 #else 417 return (0); 418 #endif /* NFSSERVER */ 419 } 420 421 int 422 genfs_mmap(v) 423 void *v; 424 { 425 return 0; 426 } 427 428 /* 429 * generic VM getpages routine. 430 * Return PG_BUSY pages for the given range, 431 * reading from backing store if necessary. 432 */ 433 434 int 435 genfs_getpages(v) 436 void *v; 437 { 438 struct vop_getpages_args /* { 439 struct vnode *a_vp; 440 voff_t a_offset; 441 struct vm_page **a_m; 442 int *a_count; 443 int a_centeridx; 444 vm_prot_t a_access_type; 445 int a_advice; 446 int a_flags; 447 } */ *ap = v; 448 449 off_t newsize, diskeof, memeof; 450 off_t offset, origoffset, startoffset, endoffset, raoffset; 451 daddr_t lbn, blkno; 452 int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount; 453 int fs_bshift, fs_bsize, dev_bshift, dev_bsize; 454 int flags = ap->a_flags; 455 size_t bytes, iobytes, tailbytes, totalbytes, skipbytes; 456 vaddr_t kva; 457 struct buf *bp, *mbp; 458 struct vnode *vp = ap->a_vp; 459 struct uvm_object *uobj = &vp->v_uvm.u_obj; 460 struct vm_page *pgs[16]; /* XXXUBC 16 */ 461 struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */ 462 boolean_t async = (flags & PGO_SYNCIO) == 0; 463 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 464 boolean_t sawhole = FALSE; 465 UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist); 466 467 UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d", 468 vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count); 469 470 /* XXXUBC temp limit */ 471 if (*ap->a_count > 16) { 472 return EINVAL; 473 } 474 475 error = 0; 476 origoffset = ap->a_offset; 477 orignpages = *ap->a_count; 478 error = VOP_SIZE(vp, vp->v_uvm.u_size, &diskeof); 479 if (error) { 480 return error; 481 } 482 if (flags & PGO_PASTEOF) { 483 newsize = MAX(vp->v_uvm.u_size, 484 origoffset + (orignpages << PAGE_SHIFT)); 485 error = VOP_SIZE(vp, newsize, &memeof); 486 if (error) { 487 return error; 488 } 489 } else { 490 memeof = diskeof; 491 } 492 KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages); 493 KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0); 494 KASSERT(orignpages > 0); 495 496 /* 497 * Bounds-check the request. 498 */ 499 500 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) { 501 if ((flags & PGO_LOCKED) == 0) { 502 simple_unlock(&uobj->vmobjlock); 503 } 504 UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x", 505 origoffset, *ap->a_count, memeof,0); 506 return EINVAL; 507 } 508 509 /* 510 * For PGO_LOCKED requests, just return whatever's in memory. 511 */ 512 513 if (flags & PGO_LOCKED) { 514 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, 515 UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY); 516 517 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; 518 } 519 520 /* vnode is VOP_LOCKed, uobj is locked */ 521 522 if (write && (vp->v_flag & VONWORKLST) == 0) { 523 vn_syncer_add_to_worklist(vp, filedelay); 524 } 525 526 /* 527 * find the requested pages and make some simple checks. 528 * leave space in the page array for a whole block. 529 */ 530 531 fs_bshift = vp->v_mount->mnt_fs_bshift; 532 fs_bsize = 1 << fs_bshift; 533 dev_bshift = vp->v_mount->mnt_dev_bshift; 534 dev_bsize = 1 << dev_bshift; 535 KASSERT((diskeof & (dev_bsize - 1)) == 0); 536 KASSERT((memeof & (dev_bsize - 1)) == 0); 537 538 orignpages = MIN(orignpages, 539 round_page(memeof - origoffset) >> PAGE_SHIFT); 540 npages = orignpages; 541 startoffset = origoffset & ~(fs_bsize - 1); 542 endoffset = round_page((origoffset + (npages << PAGE_SHIFT) 543 + fs_bsize - 1) & ~(fs_bsize - 1)); 544 endoffset = MIN(endoffset, round_page(memeof)); 545 ridx = (origoffset - startoffset) >> PAGE_SHIFT; 546 547 memset(pgs, 0, sizeof(pgs)); 548 uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL); 549 550 /* 551 * if PGO_OVERWRITE is set, don't bother reading the pages. 552 * PGO_OVERWRITE also means that the caller guarantees 553 * that the pages already have backing store allocated. 554 */ 555 556 if (flags & PGO_OVERWRITE) { 557 UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); 558 559 for (i = 0; i < npages; i++) { 560 struct vm_page *pg = pgs[ridx + i]; 561 562 if (pg->flags & PG_FAKE) { 563 uvm_pagezero(pg); 564 pg->flags &= ~(PG_FAKE); 565 } 566 pg->flags &= ~(PG_RDONLY); 567 } 568 npages += ridx; 569 goto out; 570 } 571 572 /* 573 * if the pages are already resident, just return them. 574 */ 575 576 for (i = 0; i < npages; i++) { 577 struct vm_page *pg = pgs[ridx + i]; 578 579 if ((pg->flags & PG_FAKE) || 580 (write && (pg->flags & PG_RDONLY))) { 581 break; 582 } 583 } 584 if (i == npages) { 585 UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); 586 raoffset = origoffset + (orignpages << PAGE_SHIFT); 587 npages += ridx; 588 goto raout; 589 } 590 591 /* 592 * the page wasn't resident and we're not overwriting, 593 * so we're going to have to do some i/o. 594 * find any additional pages needed to cover the expanded range. 595 */ 596 597 npages = (endoffset - startoffset) >> PAGE_SHIFT; 598 if (startoffset != origoffset || npages != orignpages) { 599 600 /* 601 * XXXUBC we need to avoid deadlocks caused by locking 602 * additional pages at lower offsets than pages we 603 * already have locked. for now, unlock them all and 604 * start over. 605 */ 606 607 for (i = 0; i < orignpages; i++) { 608 struct vm_page *pg = pgs[ridx + i]; 609 610 if (pg->flags & PG_FAKE) { 611 pg->flags |= PG_RELEASED; 612 } 613 } 614 uvm_page_unbusy(&pgs[ridx], orignpages); 615 memset(pgs, 0, sizeof(pgs)); 616 617 UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x", 618 startoffset, endoffset, 0,0); 619 npgs = npages; 620 uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL); 621 } 622 simple_unlock(&uobj->vmobjlock); 623 624 /* 625 * read the desired page(s). 626 */ 627 628 totalbytes = npages << PAGE_SHIFT; 629 bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0)); 630 tailbytes = totalbytes - bytes; 631 skipbytes = 0; 632 633 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK | 634 UVMPAGER_MAPIN_READ); 635 636 s = splbio(); 637 mbp = pool_get(&bufpool, PR_WAITOK); 638 splx(s); 639 mbp->b_bufsize = totalbytes; 640 mbp->b_data = (void *)kva; 641 mbp->b_resid = mbp->b_bcount = bytes; 642 mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0); 643 mbp->b_iodone = uvm_aio_biodone; 644 mbp->b_vp = vp; 645 LIST_INIT(&mbp->b_dep); 646 647 /* 648 * if EOF is in the middle of the range, zero the part past EOF. 649 */ 650 651 if (tailbytes > 0) { 652 memset((void *)(kva + bytes), 0, tailbytes); 653 } 654 655 /* 656 * now loop over the pages, reading as needed. 657 */ 658 659 if (write) { 660 lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL); 661 } else { 662 lockmgr(&vp->v_glock, LK_SHARED, NULL); 663 } 664 665 bp = NULL; 666 for (offset = startoffset; 667 bytes > 0; 668 offset += iobytes, bytes -= iobytes) { 669 670 /* 671 * skip pages which don't need to be read. 672 */ 673 674 pidx = (offset - startoffset) >> PAGE_SHIFT; 675 while ((pgs[pidx]->flags & (PG_FAKE|PG_RDONLY)) == 0) { 676 size_t b; 677 678 KASSERT((offset & (PAGE_SIZE - 1)) == 0); 679 b = MIN(PAGE_SIZE, bytes); 680 offset += b; 681 bytes -= b; 682 skipbytes += b; 683 pidx++; 684 UVMHIST_LOG(ubchist, "skipping, new offset 0x%x", 685 offset, 0,0,0); 686 if (bytes == 0) { 687 goto loopdone; 688 } 689 } 690 691 /* 692 * bmap the file to find out the blkno to read from and 693 * how much we can read in one i/o. if bmap returns an error, 694 * skip the rest of the top-level i/o. 695 */ 696 697 lbn = offset >> fs_bshift; 698 error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); 699 if (error) { 700 UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n", 701 lbn, error,0,0); 702 skipbytes += bytes; 703 goto loopdone; 704 } 705 706 /* 707 * see how many pages can be read with this i/o. 708 * reduce the i/o size if necessary to avoid 709 * overwriting pages with valid data. 710 */ 711 712 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, 713 bytes); 714 if (offset + iobytes > round_page(offset)) { 715 pcount = 1; 716 while (pidx + pcount < npages && 717 pgs[pidx + pcount]->flags & PG_FAKE) { 718 pcount++; 719 } 720 iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) - 721 (offset - trunc_page(offset))); 722 } 723 724 /* 725 * if this block isn't allocated, zero it instead of reading it. 726 * if this is a read access, mark the pages we zeroed PG_RDONLY. 727 */ 728 729 if (blkno < 0) { 730 int holepages = (round_page(offset + iobytes) - 731 trunc_page(offset)) >> PAGE_SHIFT; 732 UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0); 733 734 sawhole = TRUE; 735 memset((char *)kva + (offset - startoffset), 0, 736 iobytes); 737 skipbytes += iobytes; 738 739 for (i = 0; i < holepages; i++) { 740 if (write) { 741 pgs[pidx + i]->flags &= ~PG_CLEAN; 742 } else { 743 pgs[pidx + i]->flags |= PG_RDONLY; 744 } 745 } 746 continue; 747 } 748 749 /* 750 * allocate a sub-buf for this piece of the i/o 751 * (or just use mbp if there's only 1 piece), 752 * and start it going. 753 */ 754 755 if (offset == startoffset && iobytes == bytes) { 756 bp = mbp; 757 } else { 758 s = splbio(); 759 bp = pool_get(&bufpool, PR_WAITOK); 760 splx(s); 761 bp->b_data = (char *)kva + offset - startoffset; 762 bp->b_resid = bp->b_bcount = iobytes; 763 bp->b_flags = B_BUSY|B_READ|B_CALL; 764 bp->b_iodone = uvm_aio_biodone1; 765 bp->b_vp = vp; 766 LIST_INIT(&bp->b_dep); 767 } 768 bp->b_lblkno = 0; 769 bp->b_private = mbp; 770 771 /* adjust physical blkno for partial blocks */ 772 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >> 773 dev_bshift); 774 775 UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x", 776 bp, offset, iobytes, bp->b_blkno); 777 778 VOP_STRATEGY(bp); 779 } 780 781 loopdone: 782 if (skipbytes) { 783 s = splbio(); 784 if (error) { 785 mbp->b_flags |= B_ERROR; 786 mbp->b_error = error; 787 } 788 mbp->b_resid -= skipbytes; 789 if (mbp->b_resid == 0) { 790 biodone(mbp); 791 } 792 splx(s); 793 } 794 795 if (async) { 796 UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0); 797 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 798 return 0; 799 } 800 if (bp != NULL) { 801 error = biowait(mbp); 802 } 803 s = splbio(); 804 pool_put(&bufpool, mbp); 805 splx(s); 806 uvm_pagermapout(kva, npages); 807 raoffset = startoffset + totalbytes; 808 809 /* 810 * if this we encountered a hole then we have to do a little more work. 811 * for read faults, we marked the page PG_RDONLY so that future 812 * write accesses to the page will fault again. 813 * for write faults, we must make sure that the backing store for 814 * the page is completely allocated while the pages are locked. 815 */ 816 817 if (error == 0 && sawhole && write) { 818 error = VOP_BALLOCN(vp, startoffset, npages << PAGE_SHIFT, 819 cred, 0); 820 if (error) { 821 UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d", 822 lbn, error,0,0); 823 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 824 simple_lock(&uobj->vmobjlock); 825 goto out; 826 } 827 } 828 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 829 simple_lock(&uobj->vmobjlock); 830 831 /* 832 * see if we want to start any readahead. 833 * XXXUBC for now, just read the next 128k on 64k boundaries. 834 * this is pretty nonsensical, but it is 50% faster than reading 835 * just the next 64k. 836 */ 837 838 raout: 839 if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 && 840 PAGE_SHIFT <= 16) { 841 int racount; 842 843 racount = 1 << (16 - PAGE_SHIFT); 844 (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0, 845 VM_PROT_READ, 0, 0); 846 simple_lock(&uobj->vmobjlock); 847 848 racount = 1 << (16 - PAGE_SHIFT); 849 (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0, 850 VM_PROT_READ, 0, 0); 851 simple_lock(&uobj->vmobjlock); 852 } 853 854 /* 855 * we're almost done! release the pages... 856 * for errors, we free the pages. 857 * otherwise we activate them and mark them as valid and clean. 858 * also, unbusy pages that were not actually requested. 859 */ 860 861 out: 862 if (error) { 863 uvm_lock_pageq(); 864 for (i = 0; i < npages; i++) { 865 if (pgs[i] == NULL) { 866 continue; 867 } 868 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 869 pgs[i], pgs[i]->flags, 0,0); 870 if (pgs[i]->flags & PG_WANTED) { 871 wakeup(pgs[i]); 872 } 873 if (pgs[i]->flags & PG_RELEASED) { 874 uvm_unlock_pageq(); 875 (uobj->pgops->pgo_releasepg)(pgs[i], NULL); 876 uvm_lock_pageq(); 877 continue; 878 } 879 if (pgs[i]->flags & PG_FAKE) { 880 uvm_pagefree(pgs[i]); 881 continue; 882 } 883 uvm_pageactivate(pgs[i]); 884 pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); 885 UVM_PAGE_OWN(pgs[i], NULL); 886 } 887 uvm_unlock_pageq(); 888 simple_unlock(&uobj->vmobjlock); 889 UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0); 890 return error; 891 } 892 893 UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0); 894 uvm_lock_pageq(); 895 for (i = 0; i < npages; i++) { 896 if (pgs[i] == NULL) { 897 continue; 898 } 899 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 900 pgs[i], pgs[i]->flags, 0,0); 901 if (pgs[i]->flags & PG_FAKE) { 902 UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x", 903 pgs[i], pgs[i]->offset,0,0); 904 pgs[i]->flags &= ~(PG_FAKE); 905 pmap_clear_modify(pgs[i]); 906 pmap_clear_reference(pgs[i]); 907 } 908 if (write) { 909 pgs[i]->flags &= ~(PG_RDONLY); 910 } 911 if (i < ridx || i >= ridx + orignpages || async) { 912 UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x", 913 pgs[i], pgs[i]->offset,0,0); 914 if (pgs[i]->flags & PG_WANTED) { 915 wakeup(pgs[i]); 916 } 917 if (pgs[i]->flags & PG_RELEASED) { 918 uvm_unlock_pageq(); 919 (uobj->pgops->pgo_releasepg)(pgs[i], NULL); 920 uvm_lock_pageq(); 921 continue; 922 } 923 uvm_pageactivate(pgs[i]); 924 pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); 925 UVM_PAGE_OWN(pgs[i], NULL); 926 } 927 } 928 uvm_unlock_pageq(); 929 simple_unlock(&uobj->vmobjlock); 930 if (ap->a_m != NULL) { 931 memcpy(ap->a_m, &pgs[ridx], 932 orignpages * sizeof(struct vm_page *)); 933 } 934 return 0; 935 } 936 937 /* 938 * generic VM putpages routine. 939 * Write the given range of pages to backing store. 940 */ 941 942 int 943 genfs_putpages(v) 944 void *v; 945 { 946 struct vop_putpages_args /* { 947 struct vnode *a_vp; 948 struct vm_page **a_m; 949 int a_count; 950 int a_flags; 951 int *a_rtvals; 952 } */ *ap = v; 953 954 int s, error, npages, run; 955 int fs_bshift, dev_bshift, dev_bsize; 956 vaddr_t kva; 957 off_t eof, offset, startoffset; 958 size_t bytes, iobytes, skipbytes; 959 daddr_t lbn, blkno; 960 struct vm_page *pg; 961 struct buf *mbp, *bp; 962 struct vnode *vp = ap->a_vp; 963 boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0; 964 UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist); 965 UVMHIST_LOG(ubchist, "vp %p offset 0x%x count %d", 966 vp, ap->a_m[0]->offset, ap->a_count, 0); 967 968 simple_unlock(&vp->v_uvm.u_obj.vmobjlock); 969 970 error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof); 971 if (error) { 972 return error; 973 } 974 975 error = 0; 976 npages = ap->a_count; 977 fs_bshift = vp->v_mount->mnt_fs_bshift; 978 dev_bshift = vp->v_mount->mnt_dev_bshift; 979 dev_bsize = 1 << dev_bshift; 980 KASSERT((eof & (dev_bsize - 1)) == 0); 981 982 pg = ap->a_m[0]; 983 startoffset = pg->offset; 984 bytes = MIN(npages << PAGE_SHIFT, eof - startoffset); 985 skipbytes = 0; 986 KASSERT(bytes != 0); 987 988 kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK); 989 990 s = splbio(); 991 vp->v_numoutput += 2; 992 mbp = pool_get(&bufpool, PR_WAITOK); 993 UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", 994 vp, mbp, vp->v_numoutput, bytes); 995 splx(s); 996 mbp->b_bufsize = npages << PAGE_SHIFT; 997 mbp->b_data = (void *)kva; 998 mbp->b_resid = mbp->b_bcount = bytes; 999 mbp->b_flags = B_BUSY|B_WRITE|B_AGE | 1000 (async ? B_CALL : 0) | 1001 (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0); 1002 mbp->b_iodone = uvm_aio_biodone; 1003 mbp->b_vp = vp; 1004 LIST_INIT(&mbp->b_dep); 1005 1006 bp = NULL; 1007 for (offset = startoffset; 1008 bytes > 0; 1009 offset += iobytes, bytes -= iobytes) { 1010 lbn = offset >> fs_bshift; 1011 error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); 1012 if (error) { 1013 UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0); 1014 skipbytes += bytes; 1015 bytes = 0; 1016 break; 1017 } 1018 1019 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, 1020 bytes); 1021 if (blkno == (daddr_t)-1) { 1022 skipbytes += iobytes; 1023 continue; 1024 } 1025 1026 /* if it's really one i/o, don't make a second buf */ 1027 if (offset == startoffset && iobytes == bytes) { 1028 bp = mbp; 1029 } else { 1030 s = splbio(); 1031 vp->v_numoutput++; 1032 bp = pool_get(&bufpool, PR_WAITOK); 1033 UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", 1034 vp, bp, vp->v_numoutput, 0); 1035 splx(s); 1036 bp->b_data = (char *)kva + 1037 (vaddr_t)(offset - pg->offset); 1038 bp->b_resid = bp->b_bcount = iobytes; 1039 bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC; 1040 bp->b_iodone = uvm_aio_biodone1; 1041 bp->b_vp = vp; 1042 LIST_INIT(&bp->b_dep); 1043 } 1044 bp->b_lblkno = 0; 1045 bp->b_private = mbp; 1046 1047 /* adjust physical blkno for partial blocks */ 1048 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >> 1049 dev_bshift); 1050 UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x", 1051 vp, offset, bp->b_bcount, bp->b_blkno); 1052 VOP_STRATEGY(bp); 1053 } 1054 if (skipbytes) { 1055 UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0); 1056 s = splbio(); 1057 mbp->b_resid -= skipbytes; 1058 if (error) { 1059 mbp->b_flags |= B_ERROR; 1060 mbp->b_error = error; 1061 } 1062 if (mbp->b_resid == 0) { 1063 biodone(mbp); 1064 } 1065 splx(s); 1066 } 1067 if (async) { 1068 UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0); 1069 return 0; 1070 } 1071 if (bp != NULL) { 1072 UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0); 1073 error = biowait(mbp); 1074 } 1075 if (bioops.io_pageiodone) { 1076 (*bioops.io_pageiodone)(mbp); 1077 } 1078 s = splbio(); 1079 vwakeup(mbp); 1080 pool_put(&bufpool, mbp); 1081 splx(s); 1082 uvm_pagermapout(kva, npages); 1083 UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0); 1084 return error; 1085 } 1086 1087 int 1088 genfs_size(v) 1089 void *v; 1090 { 1091 struct vop_size_args /* { 1092 struct vnode *a_vp; 1093 off_t a_size; 1094 off_t *a_eobp; 1095 } */ *ap = v; 1096 int bsize; 1097 1098 bsize = 1 << ap->a_vp->v_mount->mnt_fs_bshift; 1099 *ap->a_eobp = (ap->a_size + bsize - 1) & ~(bsize - 1); 1100 return 0; 1101 } 1102