1 /* $NetBSD: genfs_vnops.c,v 1.39 2001/10/03 14:13:08 enami Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 */ 36 37 #include "opt_nfsserver.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/proc.h> 42 #include <sys/kernel.h> 43 #include <sys/mount.h> 44 #include <sys/namei.h> 45 #include <sys/vnode.h> 46 #include <sys/fcntl.h> 47 #include <sys/malloc.h> 48 #include <sys/poll.h> 49 #include <sys/mman.h> 50 51 #include <miscfs/genfs/genfs.h> 52 #include <miscfs/genfs/genfs_node.h> 53 #include <miscfs/specfs/specdev.h> 54 55 #include <uvm/uvm.h> 56 #include <uvm/uvm_pager.h> 57 58 #ifdef NFSSERVER 59 #include <nfs/rpcv2.h> 60 #include <nfs/nfsproto.h> 61 #include <nfs/nfs.h> 62 #include <nfs/nqnfs.h> 63 #include <nfs/nfs_var.h> 64 #endif 65 66 int 67 genfs_poll(v) 68 void *v; 69 { 70 struct vop_poll_args /* { 71 struct vnode *a_vp; 72 int a_events; 73 struct proc *a_p; 74 } */ *ap = v; 75 76 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 77 } 78 79 int 80 genfs_fsync(v) 81 void *v; 82 { 83 struct vop_fsync_args /* { 84 struct vnode *a_vp; 85 struct ucred *a_cred; 86 int a_flags; 87 off_t offlo; 88 off_t offhi; 89 struct proc *a_p; 90 } */ *ap = v; 91 struct vnode *vp = ap->a_vp; 92 int wait; 93 94 wait = (ap->a_flags & FSYNC_WAIT) != 0; 95 vflushbuf(vp, wait); 96 if ((ap->a_flags & FSYNC_DATAONLY) != 0) 97 return (0); 98 else 99 return (VOP_UPDATE(vp, NULL, NULL, wait ? UPDATE_WAIT : 0)); 100 } 101 102 int 103 genfs_seek(v) 104 void *v; 105 { 106 struct vop_seek_args /* { 107 struct vnode *a_vp; 108 off_t a_oldoff; 109 off_t a_newoff; 110 struct ucred *a_ucred; 111 } */ *ap = v; 112 113 if (ap->a_newoff < 0) 114 return (EINVAL); 115 116 return (0); 117 } 118 119 int 120 genfs_abortop(v) 121 void *v; 122 { 123 struct vop_abortop_args /* { 124 struct vnode *a_dvp; 125 struct componentname *a_cnp; 126 } */ *ap = v; 127 128 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) 129 PNBUF_PUT(ap->a_cnp->cn_pnbuf); 130 return (0); 131 } 132 133 int 134 genfs_fcntl(v) 135 void *v; 136 { 137 struct vop_fcntl_args /* { 138 struct vnode *a_vp; 139 u_int a_command; 140 caddr_t a_data; 141 int a_fflag; 142 struct ucred *a_cred; 143 struct proc *a_p; 144 } */ *ap = v; 145 146 if (ap->a_command == F_SETFL) 147 return (0); 148 else 149 return (EOPNOTSUPP); 150 } 151 152 /*ARGSUSED*/ 153 int 154 genfs_badop(v) 155 void *v; 156 { 157 158 panic("genfs: bad op"); 159 } 160 161 /*ARGSUSED*/ 162 int 163 genfs_nullop(v) 164 void *v; 165 { 166 167 return (0); 168 } 169 170 /*ARGSUSED*/ 171 int 172 genfs_einval(v) 173 void *v; 174 { 175 176 return (EINVAL); 177 } 178 179 /*ARGSUSED*/ 180 int 181 genfs_eopnotsupp(v) 182 void *v; 183 { 184 185 return (EOPNOTSUPP); 186 } 187 188 /* 189 * Called when an fs doesn't support a particular vop but the vop needs to 190 * vrele, vput, or vunlock passed in vnodes. 191 */ 192 int 193 genfs_eopnotsupp_rele(v) 194 void *v; 195 { 196 struct vop_generic_args /* 197 struct vnodeop_desc *a_desc; 198 / * other random data follows, presumably * / 199 } */ *ap = v; 200 struct vnodeop_desc *desc = ap->a_desc; 201 struct vnode *vp; 202 int flags, i, j, offset; 203 204 flags = desc->vdesc_flags; 205 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) { 206 if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET) 207 break; /* stop at end of list */ 208 if ((j = flags & VDESC_VP0_WILLPUT)) { 209 vp = *VOPARG_OFFSETTO(struct vnode**,offset,ap); 210 switch (j) { 211 case VDESC_VP0_WILLPUT: 212 vput(vp); 213 break; 214 case VDESC_VP0_WILLUNLOCK: 215 VOP_UNLOCK(vp, 0); 216 break; 217 case VDESC_VP0_WILLRELE: 218 vrele(vp); 219 break; 220 } 221 } 222 } 223 224 return (EOPNOTSUPP); 225 } 226 227 /*ARGSUSED*/ 228 int 229 genfs_ebadf(v) 230 void *v; 231 { 232 233 return (EBADF); 234 } 235 236 /* ARGSUSED */ 237 int 238 genfs_enoioctl(v) 239 void *v; 240 { 241 242 return (ENOTTY); 243 } 244 245 246 /* 247 * Eliminate all activity associated with the requested vnode 248 * and with all vnodes aliased to the requested vnode. 249 */ 250 int 251 genfs_revoke(v) 252 void *v; 253 { 254 struct vop_revoke_args /* { 255 struct vnode *a_vp; 256 int a_flags; 257 } */ *ap = v; 258 struct vnode *vp, *vq; 259 struct proc *p = curproc; /* XXX */ 260 261 #ifdef DIAGNOSTIC 262 if ((ap->a_flags & REVOKEALL) == 0) 263 panic("genfs_revoke: not revokeall"); 264 #endif 265 266 vp = ap->a_vp; 267 simple_lock(&vp->v_interlock); 268 269 if (vp->v_flag & VALIASED) { 270 /* 271 * If a vgone (or vclean) is already in progress, 272 * wait until it is done and return. 273 */ 274 if (vp->v_flag & VXLOCK) { 275 vp->v_flag |= VXWANT; 276 simple_unlock(&vp->v_interlock); 277 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 278 return (0); 279 } 280 /* 281 * Ensure that vp will not be vgone'd while we 282 * are eliminating its aliases. 283 */ 284 vp->v_flag |= VXLOCK; 285 simple_unlock(&vp->v_interlock); 286 while (vp->v_flag & VALIASED) { 287 simple_lock(&spechash_slock); 288 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 289 if (vq->v_rdev != vp->v_rdev || 290 vq->v_type != vp->v_type || vp == vq) 291 continue; 292 simple_unlock(&spechash_slock); 293 vgone(vq); 294 break; 295 } 296 if (vq == NULLVP) 297 simple_unlock(&spechash_slock); 298 } 299 /* 300 * Remove the lock so that vgone below will 301 * really eliminate the vnode after which time 302 * vgone will awaken any sleepers. 303 */ 304 simple_lock(&vp->v_interlock); 305 vp->v_flag &= ~VXLOCK; 306 } 307 vgonel(vp, p); 308 return (0); 309 } 310 311 /* 312 * Lock the node. 313 */ 314 int 315 genfs_lock(v) 316 void *v; 317 { 318 struct vop_lock_args /* { 319 struct vnode *a_vp; 320 int a_flags; 321 } */ *ap = v; 322 struct vnode *vp = ap->a_vp; 323 324 return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock)); 325 } 326 327 /* 328 * Unlock the node. 329 */ 330 int 331 genfs_unlock(v) 332 void *v; 333 { 334 struct vop_unlock_args /* { 335 struct vnode *a_vp; 336 int a_flags; 337 } */ *ap = v; 338 struct vnode *vp = ap->a_vp; 339 340 return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE, 341 &vp->v_interlock)); 342 } 343 344 /* 345 * Return whether or not the node is locked. 346 */ 347 int 348 genfs_islocked(v) 349 void *v; 350 { 351 struct vop_islocked_args /* { 352 struct vnode *a_vp; 353 } */ *ap = v; 354 struct vnode *vp = ap->a_vp; 355 356 return (lockstatus(&vp->v_lock)); 357 } 358 359 /* 360 * Stubs to use when there is no locking to be done on the underlying object. 361 */ 362 int 363 genfs_nolock(v) 364 void *v; 365 { 366 struct vop_lock_args /* { 367 struct vnode *a_vp; 368 int a_flags; 369 struct proc *a_p; 370 } */ *ap = v; 371 372 /* 373 * Since we are not using the lock manager, we must clear 374 * the interlock here. 375 */ 376 if (ap->a_flags & LK_INTERLOCK) 377 simple_unlock(&ap->a_vp->v_interlock); 378 return (0); 379 } 380 381 int 382 genfs_nounlock(v) 383 void *v; 384 { 385 return (0); 386 } 387 388 int 389 genfs_noislocked(v) 390 void *v; 391 { 392 return (0); 393 } 394 395 /* 396 * Local lease check for NFS servers. Just set up args and let 397 * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel, 398 * this is a null operation. 399 */ 400 int 401 genfs_lease_check(v) 402 void *v; 403 { 404 #ifdef NFSSERVER 405 struct vop_lease_args /* { 406 struct vnode *a_vp; 407 struct proc *a_p; 408 struct ucred *a_cred; 409 int a_flag; 410 } */ *ap = v; 411 u_int32_t duration = 0; 412 int cache; 413 u_quad_t frev; 414 415 (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag, 416 NQLOCALSLP, ap->a_p, (struct mbuf *)0, &cache, &frev, ap->a_cred); 417 return (0); 418 #else 419 return (0); 420 #endif /* NFSSERVER */ 421 } 422 423 int 424 genfs_mmap(v) 425 void *v; 426 { 427 return 0; 428 } 429 430 /* 431 * generic VM getpages routine. 432 * Return PG_BUSY pages for the given range, 433 * reading from backing store if necessary. 434 */ 435 436 int 437 genfs_getpages(v) 438 void *v; 439 { 440 struct vop_getpages_args /* { 441 struct vnode *a_vp; 442 voff_t a_offset; 443 struct vm_page **a_m; 444 int *a_count; 445 int a_centeridx; 446 vm_prot_t a_access_type; 447 int a_advice; 448 int a_flags; 449 } */ *ap = v; 450 451 off_t newsize, diskeof, memeof; 452 off_t offset, origoffset, startoffset, endoffset, raoffset; 453 daddr_t lbn, blkno; 454 int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount; 455 int fs_bshift, fs_bsize, dev_bshift; 456 int flags = ap->a_flags; 457 size_t bytes, iobytes, tailbytes, totalbytes, skipbytes; 458 vaddr_t kva; 459 struct buf *bp, *mbp; 460 struct vnode *vp = ap->a_vp; 461 struct vnode *devvp; 462 struct genfs_node *gp = VTOG(vp); 463 struct uvm_object *uobj = &vp->v_uobj; 464 struct vm_page *pg, *pgs[16]; /* XXXUBC 16 */ 465 struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */ 466 boolean_t async = (flags & PGO_SYNCIO) == 0; 467 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 468 boolean_t sawhole = FALSE; 469 boolean_t overwrite = (flags & PGO_OVERWRITE) != 0; 470 UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist); 471 472 UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d", 473 vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count); 474 475 /* XXXUBC temp limit */ 476 if (*ap->a_count > 16) { 477 panic("genfs_getpages: too many pages"); 478 } 479 480 error = 0; 481 origoffset = ap->a_offset; 482 orignpages = *ap->a_count; 483 GOP_SIZE(vp, vp->v_size, &diskeof); 484 if (flags & PGO_PASTEOF) { 485 newsize = MAX(vp->v_size, 486 origoffset + (orignpages << PAGE_SHIFT)); 487 GOP_SIZE(vp, newsize, &memeof); 488 } else { 489 memeof = diskeof; 490 } 491 KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages); 492 KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0); 493 KASSERT(orignpages > 0); 494 495 /* 496 * Bounds-check the request. 497 */ 498 499 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) { 500 if ((flags & PGO_LOCKED) == 0) { 501 simple_unlock(&uobj->vmobjlock); 502 } 503 UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x", 504 origoffset, *ap->a_count, memeof,0); 505 return EINVAL; 506 } 507 508 /* 509 * For PGO_LOCKED requests, just return whatever's in memory. 510 */ 511 512 if (flags & PGO_LOCKED) { 513 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, 514 UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY); 515 516 return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0; 517 } 518 519 /* vnode is VOP_LOCKed, uobj is locked */ 520 521 if (write && (vp->v_flag & VONWORKLST) == 0) { 522 vn_syncer_add_to_worklist(vp, filedelay); 523 } 524 525 /* 526 * find the requested pages and make some simple checks. 527 * leave space in the page array for a whole block. 528 */ 529 530 if (vp->v_type == VREG) { 531 fs_bshift = vp->v_mount->mnt_fs_bshift; 532 dev_bshift = vp->v_mount->mnt_dev_bshift; 533 } else { 534 fs_bshift = DEV_BSHIFT; 535 dev_bshift = DEV_BSHIFT; 536 } 537 fs_bsize = 1 << fs_bshift; 538 539 orignpages = MIN(orignpages, 540 round_page(memeof - origoffset) >> PAGE_SHIFT); 541 npages = orignpages; 542 startoffset = origoffset & ~(fs_bsize - 1); 543 endoffset = round_page((origoffset + (npages << PAGE_SHIFT) 544 + fs_bsize - 1) & ~(fs_bsize - 1)); 545 endoffset = MIN(endoffset, round_page(memeof)); 546 ridx = (origoffset - startoffset) >> PAGE_SHIFT; 547 548 memset(pgs, 0, sizeof(pgs)); 549 uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL); 550 551 /* 552 * if the pages are already resident, just return them. 553 */ 554 555 for (i = 0; i < npages; i++) { 556 struct vm_page *pg = pgs[ridx + i]; 557 558 if ((pg->flags & PG_FAKE) || 559 (write && (pg->flags & PG_RDONLY))) { 560 break; 561 } 562 } 563 if (i == npages) { 564 UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); 565 raoffset = origoffset + (orignpages << PAGE_SHIFT); 566 npages += ridx; 567 goto raout; 568 } 569 570 /* 571 * if PGO_OVERWRITE is set, don't bother reading the pages. 572 */ 573 574 if (flags & PGO_OVERWRITE) { 575 UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); 576 577 for (i = 0; i < npages; i++) { 578 struct vm_page *pg = pgs[ridx + i]; 579 580 pg->flags &= ~(PG_RDONLY|PG_CLEAN); 581 } 582 npages += ridx; 583 goto out; 584 } 585 586 /* 587 * the page wasn't resident and we're not overwriting, 588 * so we're going to have to do some i/o. 589 * find any additional pages needed to cover the expanded range. 590 */ 591 592 npages = (endoffset - startoffset) >> PAGE_SHIFT; 593 if (startoffset != origoffset || npages != orignpages) { 594 595 /* 596 * we need to avoid deadlocks caused by locking 597 * additional pages at lower offsets than pages we 598 * already have locked. unlock them all and start over. 599 */ 600 601 for (i = 0; i < orignpages; i++) { 602 struct vm_page *pg = pgs[ridx + i]; 603 604 if (pg->flags & PG_FAKE) { 605 pg->flags |= PG_RELEASED; 606 } 607 } 608 uvm_page_unbusy(&pgs[ridx], orignpages); 609 memset(pgs, 0, sizeof(pgs)); 610 611 UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x", 612 startoffset, endoffset, 0,0); 613 npgs = npages; 614 uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL); 615 } 616 simple_unlock(&uobj->vmobjlock); 617 618 /* 619 * read the desired page(s). 620 */ 621 622 totalbytes = npages << PAGE_SHIFT; 623 bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0)); 624 tailbytes = totalbytes - bytes; 625 skipbytes = 0; 626 627 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK | 628 UVMPAGER_MAPIN_READ); 629 630 s = splbio(); 631 mbp = pool_get(&bufpool, PR_WAITOK); 632 splx(s); 633 mbp->b_bufsize = totalbytes; 634 mbp->b_data = (void *)kva; 635 mbp->b_resid = mbp->b_bcount = bytes; 636 mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0); 637 mbp->b_iodone = (async ? uvm_aio_biodone : 0); 638 mbp->b_vp = vp; 639 LIST_INIT(&mbp->b_dep); 640 641 /* 642 * if EOF is in the middle of the range, zero the part past EOF. 643 * if the page including EOF is not PG_FAKE, skip over it since 644 * in that case it has valid data that we need to preserve. 645 */ 646 647 if (tailbytes > 0) { 648 size_t tailstart = bytes; 649 650 if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) { 651 tailstart = round_page(tailstart); 652 tailbytes -= tailstart - bytes; 653 } 654 UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x", 655 kva, tailstart, tailbytes,0); 656 memset((void *)(kva + tailstart), 0, tailbytes); 657 } 658 659 /* 660 * now loop over the pages, reading as needed. 661 */ 662 663 if (write) { 664 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL); 665 } else { 666 lockmgr(&gp->g_glock, LK_SHARED, NULL); 667 } 668 669 bp = NULL; 670 for (offset = startoffset; 671 bytes > 0; 672 offset += iobytes, bytes -= iobytes) { 673 674 /* 675 * skip pages which don't need to be read. 676 */ 677 678 pidx = (offset - startoffset) >> PAGE_SHIFT; 679 while ((pgs[pidx]->flags & (PG_FAKE|PG_RDONLY)) == 0) { 680 size_t b; 681 682 KASSERT((offset & (PAGE_SIZE - 1)) == 0); 683 b = MIN(PAGE_SIZE, bytes); 684 offset += b; 685 bytes -= b; 686 skipbytes += b; 687 pidx++; 688 UVMHIST_LOG(ubchist, "skipping, new offset 0x%x", 689 offset, 0,0,0); 690 if (bytes == 0) { 691 goto loopdone; 692 } 693 } 694 695 /* 696 * bmap the file to find out the blkno to read from and 697 * how much we can read in one i/o. if bmap returns an error, 698 * skip the rest of the top-level i/o. 699 */ 700 701 lbn = offset >> fs_bshift; 702 error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run); 703 if (error) { 704 UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n", 705 lbn, error,0,0); 706 skipbytes += bytes; 707 goto loopdone; 708 } 709 710 /* 711 * see how many pages can be read with this i/o. 712 * reduce the i/o size if necessary to avoid 713 * overwriting pages with valid data. 714 */ 715 716 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, 717 bytes); 718 if (offset + iobytes > round_page(offset)) { 719 pcount = 1; 720 while (pidx + pcount < npages && 721 pgs[pidx + pcount]->flags & PG_FAKE) { 722 pcount++; 723 } 724 iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) - 725 (offset - trunc_page(offset))); 726 } 727 728 /* 729 * if this block isn't allocated, zero it instead of reading it. 730 * if this is a read access, mark the pages we zeroed PG_RDONLY. 731 */ 732 733 if (blkno < 0) { 734 int holepages = (round_page(offset + iobytes) - 735 trunc_page(offset)) >> PAGE_SHIFT; 736 UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0); 737 738 sawhole = TRUE; 739 memset((char *)kva + (offset - startoffset), 0, 740 iobytes); 741 skipbytes += iobytes; 742 743 for (i = 0; i < holepages; i++) { 744 if (write) { 745 pgs[pidx + i]->flags &= ~PG_CLEAN; 746 } else { 747 pgs[pidx + i]->flags |= PG_RDONLY; 748 } 749 } 750 continue; 751 } 752 753 /* 754 * allocate a sub-buf for this piece of the i/o 755 * (or just use mbp if there's only 1 piece), 756 * and start it going. 757 */ 758 759 if (offset == startoffset && iobytes == bytes) { 760 bp = mbp; 761 } else { 762 s = splbio(); 763 bp = pool_get(&bufpool, PR_WAITOK); 764 splx(s); 765 bp->b_data = (char *)kva + offset - startoffset; 766 bp->b_resid = bp->b_bcount = iobytes; 767 bp->b_flags = B_BUSY|B_READ|B_CALL; 768 bp->b_iodone = uvm_aio_biodone1; 769 bp->b_vp = vp; 770 bp->b_proc = NULL; 771 LIST_INIT(&bp->b_dep); 772 } 773 bp->b_lblkno = 0; 774 bp->b_private = mbp; 775 if (devvp->v_type == VBLK) { 776 bp->b_dev = devvp->v_rdev; 777 } 778 779 /* adjust physical blkno for partial blocks */ 780 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >> 781 dev_bshift); 782 783 UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x", 784 bp, offset, iobytes, bp->b_blkno); 785 786 VOP_STRATEGY(bp); 787 } 788 789 loopdone: 790 if (skipbytes) { 791 s = splbio(); 792 if (error) { 793 mbp->b_flags |= B_ERROR; 794 mbp->b_error = error; 795 } 796 mbp->b_resid -= skipbytes; 797 if (mbp->b_resid == 0) { 798 biodone(mbp); 799 } 800 splx(s); 801 } 802 803 if (async) { 804 UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0); 805 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 806 return 0; 807 } 808 if (bp != NULL) { 809 error = biowait(mbp); 810 } 811 s = splbio(); 812 pool_put(&bufpool, mbp); 813 splx(s); 814 uvm_pagermapout(kva, npages); 815 raoffset = startoffset + totalbytes; 816 817 /* 818 * if this we encountered a hole then we have to do a little more work. 819 * for read faults, we marked the page PG_RDONLY so that future 820 * write accesses to the page will fault again. 821 * for write faults, we must make sure that the backing store for 822 * the page is completely allocated while the pages are locked. 823 */ 824 825 if (!error && sawhole && write) { 826 for (i = 0; i < npages; i++) { 827 if (pgs[i] == NULL) { 828 continue; 829 } 830 pgs[i]->flags &= ~PG_CLEAN; 831 UVMHIST_LOG(ubchist, "mark dirty pg %p", pgs[i],0,0,0); 832 } 833 error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0, 834 cred); 835 UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d", 836 startoffset, npages << PAGE_SHIFT, error,0); 837 } 838 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 839 simple_lock(&uobj->vmobjlock); 840 841 /* 842 * see if we want to start any readahead. 843 * XXXUBC for now, just read the next 128k on 64k boundaries. 844 * this is pretty nonsensical, but it is 50% faster than reading 845 * just the next 64k. 846 */ 847 848 raout: 849 if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 && 850 PAGE_SHIFT <= 16) { 851 int racount; 852 853 racount = 1 << (16 - PAGE_SHIFT); 854 (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0, 855 VM_PROT_READ, 0, 0); 856 simple_lock(&uobj->vmobjlock); 857 858 racount = 1 << (16 - PAGE_SHIFT); 859 (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0, 860 VM_PROT_READ, 0, 0); 861 simple_lock(&uobj->vmobjlock); 862 } 863 864 /* 865 * we're almost done! release the pages... 866 * for errors, we free the pages. 867 * otherwise we activate them and mark them as valid and clean. 868 * also, unbusy pages that were not actually requested. 869 */ 870 871 if (error) { 872 for (i = 0; i < npages; i++) { 873 if (pgs[i] == NULL) { 874 continue; 875 } 876 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 877 pgs[i], pgs[i]->flags, 0,0); 878 if (pgs[i]->flags & PG_FAKE) { 879 pgs[i]->flags |= PG_RELEASED; 880 } 881 } 882 uvm_lock_pageq(); 883 uvm_page_unbusy(pgs, npages); 884 uvm_unlock_pageq(); 885 simple_unlock(&uobj->vmobjlock); 886 UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0); 887 return error; 888 } 889 890 out: 891 UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0); 892 uvm_lock_pageq(); 893 for (i = 0; i < npages; i++) { 894 pg = pgs[i]; 895 if (pg == NULL) { 896 continue; 897 } 898 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 899 pg, pg->flags, 0,0); 900 if (pg->flags & PG_FAKE && !overwrite) { 901 pg->flags &= ~(PG_FAKE); 902 pmap_clear_modify(pgs[i]); 903 } 904 if (write) { 905 pg->flags &= ~(PG_RDONLY); 906 } 907 if (i < ridx || i >= ridx + orignpages || async) { 908 UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x", 909 pg, pg->offset,0,0); 910 if (pg->flags & PG_WANTED) { 911 wakeup(pg); 912 } 913 if (pg->flags & PG_FAKE) { 914 KASSERT(overwrite); 915 uvm_pagezero(pg); 916 } 917 if (pg->flags & PG_RELEASED) { 918 uvm_pagefree(pg); 919 continue; 920 } 921 uvm_pageactivate(pg); 922 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); 923 UVM_PAGE_OWN(pg, NULL); 924 } 925 } 926 uvm_unlock_pageq(); 927 simple_unlock(&uobj->vmobjlock); 928 if (ap->a_m != NULL) { 929 memcpy(ap->a_m, &pgs[ridx], 930 orignpages * sizeof(struct vm_page *)); 931 } 932 return 0; 933 } 934 935 /* 936 * generic VM putpages routine. 937 * Write the given range of pages to backing store. 938 * 939 * => "offhi == 0" means flush all pages at or after "offlo". 940 * => object should be locked by caller. we may _unlock_ the object 941 * if (and only if) we need to clean a page (PGO_CLEANIT), or 942 * if PGO_SYNCIO is set and there are pages busy. 943 * we return with the object locked. 944 * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O). 945 * thus, a caller might want to unlock higher level resources 946 * (e.g. vm_map) before calling flush. 947 * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither 948 * unlock the object nor block. 949 * => if PGO_ALLPAGES is set, then all pages in the object will be processed. 950 * => NOTE: we rely on the fact that the object's memq is a TAILQ and 951 * that new pages are inserted on the tail end of the list. thus, 952 * we can make a complete pass through the object in one go by starting 953 * at the head and working towards the tail (new pages are put in 954 * front of us). 955 * => NOTE: we are allowed to lock the page queues, so the caller 956 * must not be holding the page queue lock. 957 * 958 * note on "cleaning" object and PG_BUSY pages: 959 * this routine is holding the lock on the object. the only time 960 * that it can run into a PG_BUSY page that it does not own is if 961 * some other process has started I/O on the page (e.g. either 962 * a pagein, or a pageout). if the PG_BUSY page is being paged 963 * in, then it can not be dirty (!PG_CLEAN) because no one has 964 * had a chance to modify it yet. if the PG_BUSY page is being 965 * paged out then it means that someone else has already started 966 * cleaning the page for us (how nice!). in this case, if we 967 * have syncio specified, then after we make our pass through the 968 * object we need to wait for the other PG_BUSY pages to clear 969 * off (i.e. we need to do an iosync). also note that once a 970 * page is PG_BUSY it must stay in its object until it is un-busyed. 971 * 972 * note on page traversal: 973 * we can traverse the pages in an object either by going down the 974 * linked list in "uobj->memq", or we can go over the address range 975 * by page doing hash table lookups for each address. depending 976 * on how many pages are in the object it may be cheaper to do one 977 * or the other. we set "by_list" to true if we are using memq. 978 * if the cost of a hash lookup was equal to the cost of the list 979 * traversal we could compare the number of pages in the start->stop 980 * range to the total number of pages in the object. however, it 981 * seems that a hash table lookup is more expensive than the linked 982 * list traversal, so we multiply the number of pages in the 983 * range by an estimate of the relatively higher cost of the hash lookup. 984 */ 985 986 int 987 genfs_putpages(v) 988 void *v; 989 { 990 struct vop_putpages_args /* { 991 struct vnode *a_vp; 992 voff_t a_offlo; 993 voff_t a_offhi; 994 int a_flags; 995 } */ *ap = v; 996 struct vnode *vp = ap->a_vp; 997 struct uvm_object *uobj = &vp->v_uobj; 998 off_t startoff = ap->a_offlo; 999 off_t endoff = ap->a_offhi; 1000 off_t off; 1001 int flags = ap->a_flags; 1002 int n = MAXBSIZE >> PAGE_SHIFT; 1003 int i, s, error, npages, nback; 1004 int freeflag; 1005 struct vm_page *pgs[n], *pg, *nextpg, *tpg, curmp, endmp; 1006 boolean_t wasclean, by_list, needs_clean; 1007 boolean_t async = (flags & PGO_SYNCIO) == 0; 1008 UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist); 1009 1010 KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)); 1011 KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0); 1012 KASSERT(startoff < endoff || endoff == 0); 1013 1014 UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x", 1015 vp, uobj->uo_npages, startoff, endoff - startoff); 1016 if (uobj->uo_npages == 0) { 1017 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 1018 (vp->v_flag & VONWORKLST)) { 1019 vp->v_flag &= ~VONWORKLST; 1020 LIST_REMOVE(vp, v_synclist); 1021 } 1022 simple_unlock(&uobj->vmobjlock); 1023 return 0; 1024 } 1025 1026 /* 1027 * the vnode has pages, set up to process the request. 1028 */ 1029 1030 error = 0; 1031 wasclean = TRUE; 1032 off = startoff; 1033 if (endoff == 0 || flags & PGO_ALLPAGES) { 1034 endoff = trunc_page(LLONG_MAX); 1035 } 1036 by_list = (uobj->uo_npages <= 1037 ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY); 1038 1039 /* 1040 * start the loop. when scanning by list, hold the last page 1041 * in the list before we start. pages allocated after we start 1042 * will be added to the end of the list, so we can stop at the 1043 * current last page. 1044 */ 1045 1046 freeflag = (curproc == uvm.pagedaemon_proc) ? PG_PAGEOUT : PG_RELEASED; 1047 curmp.uobject = uobj; 1048 curmp.offset = (voff_t)-1; 1049 curmp.flags = PG_BUSY; 1050 endmp.uobject = uobj; 1051 endmp.offset = (voff_t)-1; 1052 endmp.flags = PG_BUSY; 1053 if (by_list) { 1054 pg = TAILQ_FIRST(&uobj->memq); 1055 TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq); 1056 PHOLD(curproc); 1057 } else { 1058 pg = uvm_pagelookup(uobj, off); 1059 } 1060 nextpg = NULL; 1061 while (by_list || off < endoff) { 1062 1063 /* 1064 * if the current page is not interesting, move on to the next. 1065 */ 1066 1067 KASSERT(pg == NULL || pg->uobject == uobj); 1068 KASSERT(pg == NULL || 1069 (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1070 (pg->flags & PG_BUSY) != 0); 1071 if (by_list) { 1072 if (pg == &endmp) { 1073 break; 1074 } 1075 if (pg->offset < startoff || pg->offset >= endoff || 1076 pg->flags & (PG_RELEASED|PG_PAGEOUT)) { 1077 pg = TAILQ_NEXT(pg, listq); 1078 continue; 1079 } 1080 off = pg->offset; 1081 } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) { 1082 off += PAGE_SIZE; 1083 if (off < endoff) { 1084 pg = uvm_pagelookup(uobj, off); 1085 } 1086 continue; 1087 } 1088 1089 /* 1090 * if the current page needs to be cleaned and it's busy, 1091 * wait for it to become unbusy. 1092 */ 1093 1094 if (flags & PGO_FREE) { 1095 pmap_page_protect(pg, VM_PROT_NONE); 1096 } 1097 if (flags & PGO_CLEANIT) { 1098 needs_clean = pmap_clear_modify(pg) || 1099 (pg->flags & PG_CLEAN) == 0; 1100 pg->flags |= PG_CLEAN; 1101 } else { 1102 needs_clean = FALSE; 1103 } 1104 if (needs_clean && pg->flags & PG_BUSY) { 1105 KASSERT(curproc != uvm.pagedaemon_proc); 1106 UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0); 1107 if (by_list) { 1108 TAILQ_INSERT_BEFORE(pg, &curmp, listq); 1109 UVMHIST_LOG(ubchist, "curmp next %p", 1110 TAILQ_NEXT(&curmp, listq), 0,0,0); 1111 } 1112 pg->flags |= PG_WANTED; 1113 pg->flags &= ~PG_CLEAN; 1114 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0, 1115 "genput", 0); 1116 simple_lock(&uobj->vmobjlock); 1117 if (by_list) { 1118 UVMHIST_LOG(ubchist, "after next %p", 1119 TAILQ_NEXT(&curmp, listq), 0,0,0); 1120 pg = TAILQ_NEXT(&curmp, listq); 1121 TAILQ_REMOVE(&uobj->memq, &curmp, listq); 1122 } else { 1123 pg = uvm_pagelookup(uobj, off); 1124 } 1125 continue; 1126 } 1127 1128 /* 1129 * if we're cleaning, build a cluster. 1130 * the cluster will consist of pages which are currently dirty, 1131 * but they will be returned to us marked clean. 1132 * if not cleaning, just operate on the one page. 1133 */ 1134 1135 if (needs_clean) { 1136 wasclean = FALSE; 1137 memset(pgs, 0, sizeof(pgs)); 1138 pg->flags |= PG_BUSY; 1139 UVM_PAGE_OWN(pg, "genfs_putpages"); 1140 1141 /* 1142 * first look backward. 1143 */ 1144 1145 npages = MIN(n >> 1, off >> PAGE_SHIFT); 1146 nback = npages; 1147 uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0], 1148 UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD); 1149 if (nback) { 1150 memmove(&pgs[0], &pgs[npages - nback], 1151 nback * sizeof(pgs[0])); 1152 } 1153 n -= nback; 1154 1155 /* 1156 * then plug in our page of interest. 1157 */ 1158 1159 pgs[nback] = pg; 1160 1161 /* 1162 * then look forward to fill in the remaining space in 1163 * the array of pages. 1164 */ 1165 1166 npages = MIN(n, (endoff - off) >> PAGE_SHIFT) - 1; 1167 uvn_findpages(uobj, off + PAGE_SIZE, &npages, 1168 &pgs[nback + 1], 1169 UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY); 1170 npages += nback + 1; 1171 } else { 1172 pgs[0] = pg; 1173 npages = 1; 1174 } 1175 1176 /* 1177 * apply FREE or DEACTIVATE options if requested. 1178 */ 1179 1180 if (flags & (PGO_DEACTIVATE|PGO_FREE)) { 1181 uvm_lock_pageq(); 1182 } 1183 for (i = 0; i < npages; i++) { 1184 tpg = pgs[i]; 1185 KASSERT(tpg->uobject == uobj); 1186 if (flags & PGO_DEACTIVATE && 1187 (tpg->pqflags & PQ_INACTIVE) == 0 && 1188 tpg->wire_count == 0) { 1189 (void) pmap_clear_reference(tpg); 1190 uvm_pagedeactivate(tpg); 1191 } else if (flags & PGO_FREE) { 1192 pmap_page_protect(tpg, VM_PROT_NONE); 1193 if (tpg->flags & PG_BUSY) { 1194 tpg->flags |= freeflag; 1195 if (freeflag == PG_PAGEOUT) { 1196 uvmexp.paging++; 1197 uvm_pagedequeue(tpg); 1198 } 1199 } else { 1200 nextpg = TAILQ_NEXT(tpg, listq); 1201 uvm_pagefree(tpg); 1202 } 1203 } 1204 } 1205 if (flags & (PGO_DEACTIVATE|PGO_FREE)) { 1206 uvm_unlock_pageq(); 1207 } 1208 if (needs_clean) { 1209 1210 /* 1211 * start the i/o. if we're traversing by list, 1212 * keep our place in the list with a marker page. 1213 */ 1214 1215 if (by_list) { 1216 TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp, 1217 listq); 1218 } 1219 simple_unlock(&uobj->vmobjlock); 1220 error = GOP_WRITE(vp, pgs, npages, flags); 1221 simple_lock(&uobj->vmobjlock); 1222 if (by_list) { 1223 pg = TAILQ_NEXT(&curmp, listq); 1224 TAILQ_REMOVE(&uobj->memq, &curmp, listq); 1225 } 1226 if (error == ENOMEM) { 1227 for (i = 0; i < npages; i++) { 1228 tpg = pgs[i]; 1229 if (tpg->flags & PG_PAGEOUT) { 1230 tpg->flags &= ~PG_PAGEOUT; 1231 uvmexp.paging--; 1232 } 1233 tpg->flags &= ~PG_CLEAN; 1234 uvm_pageactivate(tpg); 1235 } 1236 uvm_page_unbusy(pgs, npages); 1237 } 1238 if (error) { 1239 break; 1240 } 1241 if (by_list) { 1242 continue; 1243 } 1244 } 1245 1246 /* 1247 * find the next page and continue if there was no error. 1248 */ 1249 1250 if (by_list) { 1251 if (nextpg) { 1252 pg = nextpg; 1253 nextpg = NULL; 1254 } else { 1255 pg = TAILQ_NEXT(pg, listq); 1256 } 1257 } else { 1258 off += PAGE_SIZE; 1259 if (off < endoff) { 1260 pg = uvm_pagelookup(uobj, off); 1261 } 1262 } 1263 } 1264 if (by_list) { 1265 TAILQ_REMOVE(&uobj->memq, &endmp, listq); 1266 PRELE(curproc); 1267 } 1268 1269 /* 1270 * if we're cleaning and there was nothing to clean, 1271 * take us off the syncer list. if we started any i/o 1272 * and we're doing sync i/o, wait for all writes to finish. 1273 */ 1274 1275 if ((flags & PGO_CLEANIT) && wasclean && 1276 startoff == 0 && endoff == trunc_page(LLONG_MAX) && 1277 LIST_FIRST(&vp->v_dirtyblkhd) == NULL && 1278 (vp->v_flag & VONWORKLST)) { 1279 vp->v_flag &= ~VONWORKLST; 1280 LIST_REMOVE(vp, v_synclist); 1281 } 1282 if (!wasclean && !async) { 1283 s = splbio(); 1284 while (vp->v_numoutput != 0) { 1285 vp->v_flag |= VBWAIT; 1286 UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, &uobj->vmobjlock, 1287 FALSE, "genput2",0); 1288 simple_lock(&uobj->vmobjlock); 1289 } 1290 splx(s); 1291 } 1292 simple_unlock(&uobj->vmobjlock); 1293 return error; 1294 } 1295 1296 int 1297 genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags) 1298 { 1299 int s, error, run; 1300 int fs_bshift, dev_bshift; 1301 vaddr_t kva; 1302 off_t eof, offset, startoffset; 1303 size_t bytes, iobytes, skipbytes; 1304 daddr_t lbn, blkno; 1305 struct vm_page *pg; 1306 struct buf *mbp, *bp; 1307 struct vnode *devvp; 1308 boolean_t async = (flags & PGO_SYNCIO) == 0; 1309 UVMHIST_FUNC("genfs_gop_write"); UVMHIST_CALLED(ubchist); 1310 1311 UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x", 1312 vp, pgs, npages, flags); 1313 1314 GOP_SIZE(vp, vp->v_size, &eof); 1315 if (vp->v_type == VREG) { 1316 fs_bshift = vp->v_mount->mnt_fs_bshift; 1317 dev_bshift = vp->v_mount->mnt_dev_bshift; 1318 } else { 1319 fs_bshift = DEV_BSHIFT; 1320 dev_bshift = DEV_BSHIFT; 1321 } 1322 error = 0; 1323 pg = pgs[0]; 1324 startoffset = pg->offset; 1325 bytes = MIN(npages << PAGE_SHIFT, eof - startoffset); 1326 skipbytes = 0; 1327 KASSERT(bytes != 0); 1328 1329 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WRITE | 1330 UVMPAGER_MAPIN_WAITOK); 1331 1332 s = splbio(); 1333 vp->v_numoutput += 2; 1334 mbp = pool_get(&bufpool, PR_WAITOK); 1335 UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", 1336 vp, mbp, vp->v_numoutput, bytes); 1337 splx(s); 1338 mbp->b_bufsize = npages << PAGE_SHIFT; 1339 mbp->b_data = (void *)kva; 1340 mbp->b_resid = mbp->b_bcount = bytes; 1341 mbp->b_flags = B_BUSY|B_WRITE|B_AGE| (async ? B_CALL : 0); 1342 mbp->b_iodone = uvm_aio_biodone; 1343 mbp->b_vp = vp; 1344 LIST_INIT(&mbp->b_dep); 1345 1346 bp = NULL; 1347 for (offset = startoffset; 1348 bytes > 0; 1349 offset += iobytes, bytes -= iobytes) { 1350 lbn = offset >> fs_bshift; 1351 error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run); 1352 if (error) { 1353 UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0); 1354 skipbytes += bytes; 1355 bytes = 0; 1356 break; 1357 } 1358 1359 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, 1360 bytes); 1361 if (blkno == (daddr_t)-1) { 1362 skipbytes += iobytes; 1363 continue; 1364 } 1365 1366 /* if it's really one i/o, don't make a second buf */ 1367 if (offset == startoffset && iobytes == bytes) { 1368 bp = mbp; 1369 } else { 1370 s = splbio(); 1371 vp->v_numoutput++; 1372 bp = pool_get(&bufpool, PR_WAITOK); 1373 UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", 1374 vp, bp, vp->v_numoutput, 0); 1375 splx(s); 1376 bp->b_data = (char *)kva + 1377 (vaddr_t)(offset - pg->offset); 1378 bp->b_resid = bp->b_bcount = iobytes; 1379 bp->b_flags = B_BUSY|B_WRITE|B_CALL; 1380 bp->b_iodone = uvm_aio_biodone1; 1381 bp->b_vp = vp; 1382 LIST_INIT(&bp->b_dep); 1383 } 1384 bp->b_lblkno = 0; 1385 bp->b_private = mbp; 1386 if (devvp->v_type == VBLK) { 1387 bp->b_dev = devvp->v_rdev; 1388 } 1389 1390 /* adjust physical blkno for partial blocks */ 1391 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >> 1392 dev_bshift); 1393 UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x", 1394 vp, offset, bp->b_bcount, bp->b_blkno); 1395 VOP_STRATEGY(bp); 1396 } 1397 if (skipbytes) { 1398 UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0); 1399 s = splbio(); 1400 if (error) { 1401 mbp->b_flags |= B_ERROR; 1402 mbp->b_error = error; 1403 } 1404 mbp->b_resid -= skipbytes; 1405 if (mbp->b_resid == 0) { 1406 biodone(mbp); 1407 } 1408 splx(s); 1409 } 1410 if (async) { 1411 UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0); 1412 return 0; 1413 } 1414 UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0); 1415 error = biowait(mbp); 1416 uvm_aio_aiodone(mbp); 1417 UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0); 1418 return error; 1419 } 1420 1421 void 1422 genfs_node_init(struct vnode *vp, struct genfs_ops *ops) 1423 { 1424 struct genfs_node *gp = VTOG(vp); 1425 1426 lockinit(&gp->g_glock, PINOD, "glock", 0, 0); 1427 gp->g_op = ops; 1428 } 1429 1430 void 1431 genfs_size(struct vnode *vp, off_t size, off_t *eobp) 1432 { 1433 int bsize; 1434 1435 bsize = 1 << vp->v_mount->mnt_fs_bshift; 1436 *eobp = (size + bsize - 1) & ~(bsize - 1); 1437 } 1438