1 /* $NetBSD: genfs_vnops.c,v 1.136 2006/10/14 09:16:28 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.136 2006/10/14 09:16:28 yamt Exp $"); 35 36 #if defined(_KERNEL_OPT) 37 #include "opt_nfsserver.h" 38 #endif 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/proc.h> 43 #include <sys/kernel.h> 44 #include <sys/mount.h> 45 #include <sys/namei.h> 46 #include <sys/vnode.h> 47 #include <sys/fcntl.h> 48 #include <sys/kmem.h> 49 #include <sys/poll.h> 50 #include <sys/mman.h> 51 #include <sys/file.h> 52 #include <sys/kauth.h> 53 54 #include <miscfs/genfs/genfs.h> 55 #include <miscfs/genfs/genfs_node.h> 56 #include <miscfs/specfs/specdev.h> 57 58 #include <uvm/uvm.h> 59 #include <uvm/uvm_pager.h> 60 61 #ifdef NFSSERVER 62 #include <nfs/rpcv2.h> 63 #include <nfs/nfsproto.h> 64 #include <nfs/nfs.h> 65 #include <nfs/nqnfs.h> 66 #include <nfs/nfs_var.h> 67 #endif 68 69 static int genfs_do_directio(struct vmspace *, vaddr_t, size_t, struct vnode *, 70 off_t, enum uio_rw); 71 static void genfs_dio_iodone(struct buf *); 72 73 static int genfs_do_io(struct vnode *, off_t, vaddr_t, size_t, int, enum uio_rw, 74 void (*)(struct buf *)); 75 static inline void genfs_rel_pages(struct vm_page **, int); 76 static void filt_genfsdetach(struct knote *); 77 static int filt_genfsread(struct knote *, long); 78 static int filt_genfsvnode(struct knote *, long); 79 80 #define MAX_READ_PAGES 16 /* XXXUBC 16 */ 81 82 int genfs_maxdio = MAXPHYS; 83 84 int 85 genfs_poll(void *v) 86 { 87 struct vop_poll_args /* { 88 struct vnode *a_vp; 89 int a_events; 90 struct lwp *a_l; 91 } */ *ap = v; 92 93 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 94 } 95 96 int 97 genfs_seek(void *v) 98 { 99 struct vop_seek_args /* { 100 struct vnode *a_vp; 101 off_t a_oldoff; 102 off_t a_newoff; 103 kauth_cred_t cred; 104 } */ *ap = v; 105 106 if (ap->a_newoff < 0) 107 return (EINVAL); 108 109 return (0); 110 } 111 112 int 113 genfs_abortop(void *v) 114 { 115 struct vop_abortop_args /* { 116 struct vnode *a_dvp; 117 struct componentname *a_cnp; 118 } */ *ap = v; 119 120 if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) 121 PNBUF_PUT(ap->a_cnp->cn_pnbuf); 122 return (0); 123 } 124 125 int 126 genfs_fcntl(void *v) 127 { 128 struct vop_fcntl_args /* { 129 struct vnode *a_vp; 130 u_int a_command; 131 caddr_t a_data; 132 int a_fflag; 133 kauth_cred_t a_cred; 134 struct lwp *a_l; 135 } */ *ap = v; 136 137 if (ap->a_command == F_SETFL) 138 return (0); 139 else 140 return (EOPNOTSUPP); 141 } 142 143 /*ARGSUSED*/ 144 int 145 genfs_badop(void *v __unused) 146 { 147 148 panic("genfs: bad op"); 149 } 150 151 /*ARGSUSED*/ 152 int 153 genfs_nullop(void *v __unused) 154 { 155 156 return (0); 157 } 158 159 /*ARGSUSED*/ 160 int 161 genfs_einval(void *v __unused) 162 { 163 164 return (EINVAL); 165 } 166 167 /* 168 * Called when an fs doesn't support a particular vop. 169 * This takes care to vrele, vput, or vunlock passed in vnodes. 170 */ 171 int 172 genfs_eopnotsupp(void *v) 173 { 174 struct vop_generic_args /* 175 struct vnodeop_desc *a_desc; 176 / * other random data follows, presumably * / 177 } */ *ap = v; 178 struct vnodeop_desc *desc = ap->a_desc; 179 struct vnode *vp, *vp_last = NULL; 180 int flags, i, j, offset; 181 182 flags = desc->vdesc_flags; 183 for (i = 0; i < VDESC_MAX_VPS; flags >>=1, i++) { 184 if ((offset = desc->vdesc_vp_offsets[i]) == VDESC_NO_OFFSET) 185 break; /* stop at end of list */ 186 if ((j = flags & VDESC_VP0_WILLPUT)) { 187 vp = *VOPARG_OFFSETTO(struct vnode **, offset, ap); 188 189 /* Skip if NULL */ 190 if (!vp) 191 continue; 192 193 switch (j) { 194 case VDESC_VP0_WILLPUT: 195 /* Check for dvp == vp cases */ 196 if (vp == vp_last) 197 vrele(vp); 198 else { 199 vput(vp); 200 vp_last = vp; 201 } 202 break; 203 case VDESC_VP0_WILLUNLOCK: 204 VOP_UNLOCK(vp, 0); 205 break; 206 case VDESC_VP0_WILLRELE: 207 vrele(vp); 208 break; 209 } 210 } 211 } 212 213 return (EOPNOTSUPP); 214 } 215 216 /*ARGSUSED*/ 217 int 218 genfs_ebadf(void *v __unused) 219 { 220 221 return (EBADF); 222 } 223 224 /* ARGSUSED */ 225 int 226 genfs_enoioctl(void *v __unused) 227 { 228 229 return (EPASSTHROUGH); 230 } 231 232 233 /* 234 * Eliminate all activity associated with the requested vnode 235 * and with all vnodes aliased to the requested vnode. 236 */ 237 int 238 genfs_revoke(void *v) 239 { 240 struct vop_revoke_args /* { 241 struct vnode *a_vp; 242 int a_flags; 243 } */ *ap = v; 244 struct vnode *vp, *vq; 245 struct lwp *l = curlwp; /* XXX */ 246 247 #ifdef DIAGNOSTIC 248 if ((ap->a_flags & REVOKEALL) == 0) 249 panic("genfs_revoke: not revokeall"); 250 #endif 251 252 vp = ap->a_vp; 253 simple_lock(&vp->v_interlock); 254 255 if (vp->v_flag & VALIASED) { 256 /* 257 * If a vgone (or vclean) is already in progress, 258 * wait until it is done and return. 259 */ 260 if (vp->v_flag & VXLOCK) { 261 vp->v_flag |= VXWANT; 262 ltsleep(vp, PINOD|PNORELOCK, "vop_revokeall", 0, 263 &vp->v_interlock); 264 return (0); 265 } 266 /* 267 * Ensure that vp will not be vgone'd while we 268 * are eliminating its aliases. 269 */ 270 vp->v_flag |= VXLOCK; 271 simple_unlock(&vp->v_interlock); 272 while (vp->v_flag & VALIASED) { 273 simple_lock(&spechash_slock); 274 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 275 if (vq->v_rdev != vp->v_rdev || 276 vq->v_type != vp->v_type || vp == vq) 277 continue; 278 simple_unlock(&spechash_slock); 279 vgone(vq); 280 break; 281 } 282 if (vq == NULLVP) 283 simple_unlock(&spechash_slock); 284 } 285 /* 286 * Remove the lock so that vgone below will 287 * really eliminate the vnode after which time 288 * vgone will awaken any sleepers. 289 */ 290 simple_lock(&vp->v_interlock); 291 vp->v_flag &= ~VXLOCK; 292 } 293 vgonel(vp, l); 294 return (0); 295 } 296 297 /* 298 * Lock the node. 299 */ 300 int 301 genfs_lock(void *v) 302 { 303 struct vop_lock_args /* { 304 struct vnode *a_vp; 305 int a_flags; 306 } */ *ap = v; 307 struct vnode *vp = ap->a_vp; 308 309 return (lockmgr(vp->v_vnlock, ap->a_flags, &vp->v_interlock)); 310 } 311 312 /* 313 * Unlock the node. 314 */ 315 int 316 genfs_unlock(void *v) 317 { 318 struct vop_unlock_args /* { 319 struct vnode *a_vp; 320 int a_flags; 321 } */ *ap = v; 322 struct vnode *vp = ap->a_vp; 323 324 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, 325 &vp->v_interlock)); 326 } 327 328 /* 329 * Return whether or not the node is locked. 330 */ 331 int 332 genfs_islocked(void *v) 333 { 334 struct vop_islocked_args /* { 335 struct vnode *a_vp; 336 } */ *ap = v; 337 struct vnode *vp = ap->a_vp; 338 339 return (lockstatus(vp->v_vnlock)); 340 } 341 342 /* 343 * Stubs to use when there is no locking to be done on the underlying object. 344 */ 345 int 346 genfs_nolock(void *v) 347 { 348 struct vop_lock_args /* { 349 struct vnode *a_vp; 350 int a_flags; 351 struct lwp *a_l; 352 } */ *ap = v; 353 354 /* 355 * Since we are not using the lock manager, we must clear 356 * the interlock here. 357 */ 358 if (ap->a_flags & LK_INTERLOCK) 359 simple_unlock(&ap->a_vp->v_interlock); 360 return (0); 361 } 362 363 int 364 genfs_nounlock(void *v __unused) 365 { 366 367 return (0); 368 } 369 370 int 371 genfs_noislocked(void *v __unused) 372 { 373 374 return (0); 375 } 376 377 /* 378 * Local lease check for NFS servers. Just set up args and let 379 * nqsrv_getlease() do the rest. If NFSSERVER is not in the kernel, 380 * this is a null operation. 381 */ 382 int 383 genfs_lease_check(void *v) 384 { 385 #ifdef NFSSERVER 386 struct vop_lease_args /* { 387 struct vnode *a_vp; 388 struct lwp *a_l; 389 kauth_cred_t a_cred; 390 int a_flag; 391 } */ *ap = v; 392 u_int32_t duration = 0; 393 int cache; 394 u_quad_t frev; 395 396 (void) nqsrv_getlease(ap->a_vp, &duration, ND_CHECK | ap->a_flag, 397 NQLOCALSLP, ap->a_l, (struct mbuf *)0, &cache, &frev, ap->a_cred); 398 return (0); 399 #else 400 (void) v; 401 return (0); 402 #endif /* NFSSERVER */ 403 } 404 405 int 406 genfs_mmap(void *v __unused) 407 { 408 409 return (0); 410 } 411 412 static inline void 413 genfs_rel_pages(struct vm_page **pgs, int npages) 414 { 415 int i; 416 417 for (i = 0; i < npages; i++) { 418 struct vm_page *pg = pgs[i]; 419 420 if (pg == NULL || pg == PGO_DONTCARE) 421 continue; 422 if (pg->flags & PG_FAKE) { 423 pg->flags |= PG_RELEASED; 424 } 425 } 426 uvm_lock_pageq(); 427 uvm_page_unbusy(pgs, npages); 428 uvm_unlock_pageq(); 429 } 430 431 /* 432 * generic VM getpages routine. 433 * Return PG_BUSY pages for the given range, 434 * reading from backing store if necessary. 435 */ 436 437 int 438 genfs_getpages(void *v) 439 { 440 struct vop_getpages_args /* { 441 struct vnode *a_vp; 442 voff_t a_offset; 443 struct vm_page **a_m; 444 int *a_count; 445 int a_centeridx; 446 vm_prot_t a_access_type; 447 int a_advice; 448 int a_flags; 449 } */ *ap = v; 450 451 off_t newsize, diskeof, memeof; 452 off_t offset, origoffset, startoffset, endoffset; 453 daddr_t lbn, blkno; 454 int i, error, npages, orignpages, npgs, run, ridx, pidx, pcount; 455 int fs_bshift, fs_bsize, dev_bshift; 456 int flags = ap->a_flags; 457 size_t bytes, iobytes, tailbytes, totalbytes, skipbytes; 458 vaddr_t kva; 459 struct buf *bp, *mbp; 460 struct vnode *vp = ap->a_vp; 461 struct vnode *devvp; 462 struct genfs_node *gp = VTOG(vp); 463 struct uvm_object *uobj = &vp->v_uobj; 464 struct vm_page *pg, **pgs, *pgs_onstack[MAX_READ_PAGES]; 465 int pgs_size; 466 kauth_cred_t cred = curlwp->l_cred; /* XXXUBC curlwp */ 467 boolean_t async = (flags & PGO_SYNCIO) == 0; 468 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 469 boolean_t sawhole = FALSE; 470 boolean_t overwrite = (flags & PGO_OVERWRITE) != 0; 471 boolean_t blockalloc = write && (flags & PGO_NOBLOCKALLOC) == 0; 472 voff_t origvsize; 473 UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist); 474 475 UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d", 476 vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count); 477 478 KASSERT(vp->v_type == VREG || vp->v_type == VDIR || 479 vp->v_type == VLNK || vp->v_type == VBLK); 480 481 /* XXXUBC temp limit */ 482 if (*ap->a_count > MAX_READ_PAGES) { 483 panic("genfs_getpages: too many pages"); 484 } 485 486 startover: 487 error = 0; 488 origvsize = vp->v_size; 489 origoffset = ap->a_offset; 490 orignpages = *ap->a_count; 491 GOP_SIZE(vp, vp->v_size, &diskeof, 0); 492 if (flags & PGO_PASTEOF) { 493 newsize = MAX(vp->v_size, 494 origoffset + (orignpages << PAGE_SHIFT)); 495 GOP_SIZE(vp, newsize, &memeof, GOP_SIZE_MEM); 496 } else { 497 GOP_SIZE(vp, vp->v_size, &memeof, GOP_SIZE_MEM); 498 } 499 KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages); 500 KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0); 501 KASSERT(orignpages > 0); 502 503 /* 504 * Bounds-check the request. 505 */ 506 507 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) { 508 if ((flags & PGO_LOCKED) == 0) { 509 simple_unlock(&uobj->vmobjlock); 510 } 511 UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x", 512 origoffset, *ap->a_count, memeof,0); 513 return (EINVAL); 514 } 515 516 /* uobj is locked */ 517 518 if ((flags & PGO_NOTIMESTAMP) == 0 && 519 (vp->v_type != VBLK || 520 (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) { 521 int updflags = 0; 522 523 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) { 524 updflags = GOP_UPDATE_ACCESSED; 525 } 526 if (write) { 527 updflags |= GOP_UPDATE_MODIFIED; 528 } 529 if (updflags != 0) { 530 GOP_MARKUPDATE(vp, updflags); 531 } 532 } 533 534 if (write) { 535 gp->g_dirtygen++; 536 if ((vp->v_flag & VONWORKLST) == 0) { 537 vn_syncer_add_to_worklist(vp, filedelay); 538 } 539 if ((vp->v_flag & (VWRITEMAP|VWRITEMAPDIRTY)) == VWRITEMAP) { 540 vp->v_flag |= VWRITEMAPDIRTY; 541 } 542 } 543 544 /* 545 * For PGO_LOCKED requests, just return whatever's in memory. 546 */ 547 548 if (flags & PGO_LOCKED) { 549 int nfound; 550 551 npages = *ap->a_count; 552 #if defined(DEBUG) 553 for (i = 0; i < npages; i++) { 554 pg = ap->a_m[i]; 555 KASSERT(pg == NULL || pg == PGO_DONTCARE); 556 } 557 #endif /* defined(DEBUG) */ 558 nfound = uvn_findpages(uobj, origoffset, &npages, 559 ap->a_m, UFP_NOWAIT|UFP_NOALLOC|(write ? UFP_NORDONLY : 0)); 560 KASSERT(npages == *ap->a_count); 561 if (nfound == 0) { 562 return EBUSY; 563 } 564 if (lockmgr(&gp->g_glock, LK_SHARED | LK_NOWAIT, NULL)) { 565 genfs_rel_pages(ap->a_m, npages); 566 567 /* 568 * restore the array. 569 */ 570 571 for (i = 0; i < npages; i++) { 572 pg = ap->a_m[i]; 573 574 if (pg != NULL || pg != PGO_DONTCARE) { 575 ap->a_m[i] = NULL; 576 } 577 } 578 } else { 579 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 580 } 581 return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0); 582 } 583 simple_unlock(&uobj->vmobjlock); 584 585 /* 586 * find the requested pages and make some simple checks. 587 * leave space in the page array for a whole block. 588 */ 589 590 if (vp->v_type != VBLK) { 591 fs_bshift = vp->v_mount->mnt_fs_bshift; 592 dev_bshift = vp->v_mount->mnt_dev_bshift; 593 } else { 594 fs_bshift = DEV_BSHIFT; 595 dev_bshift = DEV_BSHIFT; 596 } 597 fs_bsize = 1 << fs_bshift; 598 599 orignpages = MIN(orignpages, 600 round_page(memeof - origoffset) >> PAGE_SHIFT); 601 npages = orignpages; 602 startoffset = origoffset & ~(fs_bsize - 1); 603 endoffset = round_page((origoffset + (npages << PAGE_SHIFT) + 604 fs_bsize - 1) & ~(fs_bsize - 1)); 605 endoffset = MIN(endoffset, round_page(memeof)); 606 ridx = (origoffset - startoffset) >> PAGE_SHIFT; 607 608 pgs_size = sizeof(struct vm_page *) * 609 ((endoffset - startoffset) >> PAGE_SHIFT); 610 if (pgs_size > sizeof(pgs_onstack)) { 611 pgs = kmem_zalloc(pgs_size, async ? KM_NOSLEEP : KM_SLEEP); 612 if (pgs == NULL) { 613 return (ENOMEM); 614 } 615 } else { 616 pgs = pgs_onstack; 617 memset(pgs, 0, pgs_size); 618 } 619 UVMHIST_LOG(ubchist, "ridx %d npages %d startoff %ld endoff %ld", 620 ridx, npages, startoffset, endoffset); 621 622 /* 623 * hold g_glock to prevent a race with truncate. 624 * 625 * check if our idea of v_size is still valid. 626 */ 627 628 if (blockalloc) { 629 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL); 630 } else { 631 lockmgr(&gp->g_glock, LK_SHARED, NULL); 632 } 633 simple_lock(&uobj->vmobjlock); 634 if (vp->v_size < origvsize) { 635 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 636 if (pgs != pgs_onstack) 637 kmem_free(pgs, pgs_size); 638 goto startover; 639 } 640 641 if (uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], 642 async ? UFP_NOWAIT : UFP_ALL) != orignpages) { 643 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 644 KASSERT(async != 0); 645 genfs_rel_pages(&pgs[ridx], orignpages); 646 simple_unlock(&uobj->vmobjlock); 647 if (pgs != pgs_onstack) 648 kmem_free(pgs, pgs_size); 649 return (EBUSY); 650 } 651 652 /* 653 * if the pages are already resident, just return them. 654 */ 655 656 for (i = 0; i < npages; i++) { 657 struct vm_page *pg1 = pgs[ridx + i]; 658 659 if ((pg1->flags & PG_FAKE) || 660 (blockalloc && (pg1->flags & PG_RDONLY))) { 661 break; 662 } 663 } 664 if (i == npages) { 665 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 666 UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); 667 npages += ridx; 668 goto out; 669 } 670 671 /* 672 * if PGO_OVERWRITE is set, don't bother reading the pages. 673 */ 674 675 if (overwrite) { 676 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 677 UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); 678 679 for (i = 0; i < npages; i++) { 680 struct vm_page *pg1 = pgs[ridx + i]; 681 682 pg1->flags &= ~(PG_RDONLY|PG_CLEAN); 683 } 684 npages += ridx; 685 goto out; 686 } 687 688 /* 689 * the page wasn't resident and we're not overwriting, 690 * so we're going to have to do some i/o. 691 * find any additional pages needed to cover the expanded range. 692 */ 693 694 npages = (endoffset - startoffset) >> PAGE_SHIFT; 695 if (startoffset != origoffset || npages != orignpages) { 696 697 /* 698 * we need to avoid deadlocks caused by locking 699 * additional pages at lower offsets than pages we 700 * already have locked. unlock them all and start over. 701 */ 702 703 genfs_rel_pages(&pgs[ridx], orignpages); 704 memset(pgs, 0, pgs_size); 705 706 UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x", 707 startoffset, endoffset, 0,0); 708 npgs = npages; 709 if (uvn_findpages(uobj, startoffset, &npgs, pgs, 710 async ? UFP_NOWAIT : UFP_ALL) != npages) { 711 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 712 KASSERT(async != 0); 713 genfs_rel_pages(pgs, npages); 714 simple_unlock(&uobj->vmobjlock); 715 if (pgs != pgs_onstack) 716 kmem_free(pgs, pgs_size); 717 return (EBUSY); 718 } 719 } 720 simple_unlock(&uobj->vmobjlock); 721 722 /* 723 * read the desired page(s). 724 */ 725 726 totalbytes = npages << PAGE_SHIFT; 727 bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0)); 728 tailbytes = totalbytes - bytes; 729 skipbytes = 0; 730 731 kva = uvm_pagermapin(pgs, npages, 732 UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK); 733 734 mbp = getiobuf(); 735 mbp->b_bufsize = totalbytes; 736 mbp->b_data = (void *)kva; 737 mbp->b_resid = mbp->b_bcount = bytes; 738 mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0); 739 mbp->b_iodone = (async ? uvm_aio_biodone : 0); 740 mbp->b_vp = vp; 741 if (async) 742 BIO_SETPRIO(mbp, BPRIO_TIMELIMITED); 743 else 744 BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL); 745 746 /* 747 * if EOF is in the middle of the range, zero the part past EOF. 748 * if the page including EOF is not PG_FAKE, skip over it since 749 * in that case it has valid data that we need to preserve. 750 */ 751 752 if (tailbytes > 0) { 753 size_t tailstart = bytes; 754 755 if ((pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE) == 0) { 756 tailstart = round_page(tailstart); 757 tailbytes -= tailstart - bytes; 758 } 759 UVMHIST_LOG(ubchist, "tailbytes %p 0x%x 0x%x", 760 kva, tailstart, tailbytes,0); 761 memset((void *)(kva + tailstart), 0, tailbytes); 762 } 763 764 /* 765 * now loop over the pages, reading as needed. 766 */ 767 768 bp = NULL; 769 for (offset = startoffset; 770 bytes > 0; 771 offset += iobytes, bytes -= iobytes) { 772 773 /* 774 * skip pages which don't need to be read. 775 */ 776 777 pidx = (offset - startoffset) >> PAGE_SHIFT; 778 while ((pgs[pidx]->flags & PG_FAKE) == 0) { 779 size_t b; 780 781 KASSERT((offset & (PAGE_SIZE - 1)) == 0); 782 if ((pgs[pidx]->flags & PG_RDONLY)) { 783 sawhole = TRUE; 784 } 785 b = MIN(PAGE_SIZE, bytes); 786 offset += b; 787 bytes -= b; 788 skipbytes += b; 789 pidx++; 790 UVMHIST_LOG(ubchist, "skipping, new offset 0x%x", 791 offset, 0,0,0); 792 if (bytes == 0) { 793 goto loopdone; 794 } 795 } 796 797 /* 798 * bmap the file to find out the blkno to read from and 799 * how much we can read in one i/o. if bmap returns an error, 800 * skip the rest of the top-level i/o. 801 */ 802 803 lbn = offset >> fs_bshift; 804 error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run); 805 if (error) { 806 UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n", 807 lbn, error,0,0); 808 skipbytes += bytes; 809 goto loopdone; 810 } 811 812 /* 813 * see how many pages can be read with this i/o. 814 * reduce the i/o size if necessary to avoid 815 * overwriting pages with valid data. 816 */ 817 818 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, 819 bytes); 820 if (offset + iobytes > round_page(offset)) { 821 pcount = 1; 822 while (pidx + pcount < npages && 823 pgs[pidx + pcount]->flags & PG_FAKE) { 824 pcount++; 825 } 826 iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) - 827 (offset - trunc_page(offset))); 828 } 829 830 /* 831 * if this block isn't allocated, zero it instead of 832 * reading it. unless we are going to allocate blocks, 833 * mark the pages we zeroed PG_RDONLY. 834 */ 835 836 if (blkno < 0) { 837 int holepages = (round_page(offset + iobytes) - 838 trunc_page(offset)) >> PAGE_SHIFT; 839 UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0); 840 841 sawhole = TRUE; 842 memset((char *)kva + (offset - startoffset), 0, 843 iobytes); 844 skipbytes += iobytes; 845 846 for (i = 0; i < holepages; i++) { 847 if (write) { 848 pgs[pidx + i]->flags &= ~PG_CLEAN; 849 } 850 if (!blockalloc) { 851 pgs[pidx + i]->flags |= PG_RDONLY; 852 } 853 } 854 continue; 855 } 856 857 /* 858 * allocate a sub-buf for this piece of the i/o 859 * (or just use mbp if there's only 1 piece), 860 * and start it going. 861 */ 862 863 if (offset == startoffset && iobytes == bytes) { 864 bp = mbp; 865 } else { 866 bp = getiobuf(); 867 nestiobuf_setup(mbp, bp, offset - startoffset, iobytes); 868 } 869 bp->b_lblkno = 0; 870 871 /* adjust physical blkno for partial blocks */ 872 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >> 873 dev_bshift); 874 875 UVMHIST_LOG(ubchist, 876 "bp %p offset 0x%x bcount 0x%x blkno 0x%x", 877 bp, offset, iobytes, bp->b_blkno); 878 879 VOP_STRATEGY(devvp, bp); 880 } 881 882 loopdone: 883 nestiobuf_done(mbp, skipbytes, error); 884 if (async) { 885 UVMHIST_LOG(ubchist, "returning 0 (async)",0,0,0,0); 886 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 887 if (pgs != pgs_onstack) 888 kmem_free(pgs, pgs_size); 889 return (0); 890 } 891 if (bp != NULL) { 892 error = biowait(mbp); 893 } 894 putiobuf(mbp); 895 uvm_pagermapout(kva, npages); 896 897 /* 898 * if this we encountered a hole then we have to do a little more work. 899 * for read faults, we marked the page PG_RDONLY so that future 900 * write accesses to the page will fault again. 901 * for write faults, we must make sure that the backing store for 902 * the page is completely allocated while the pages are locked. 903 */ 904 905 if (!error && sawhole && blockalloc) { 906 error = GOP_ALLOC(vp, startoffset, npages << PAGE_SHIFT, 0, 907 cred); 908 UVMHIST_LOG(ubchist, "gop_alloc off 0x%x/0x%x -> %d", 909 startoffset, npages << PAGE_SHIFT, error,0); 910 if (!error) { 911 for (i = 0; i < npages; i++) { 912 if (pgs[i] == NULL) { 913 continue; 914 } 915 pgs[i]->flags &= ~(PG_CLEAN|PG_RDONLY); 916 UVMHIST_LOG(ubchist, "mark dirty pg %p", 917 pgs[i],0,0,0); 918 } 919 } 920 } 921 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 922 simple_lock(&uobj->vmobjlock); 923 924 /* 925 * we're almost done! release the pages... 926 * for errors, we free the pages. 927 * otherwise we activate them and mark them as valid and clean. 928 * also, unbusy pages that were not actually requested. 929 */ 930 931 if (error) { 932 for (i = 0; i < npages; i++) { 933 if (pgs[i] == NULL) { 934 continue; 935 } 936 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 937 pgs[i], pgs[i]->flags, 0,0); 938 if (pgs[i]->flags & PG_FAKE) { 939 pgs[i]->flags |= PG_RELEASED; 940 } 941 } 942 uvm_lock_pageq(); 943 uvm_page_unbusy(pgs, npages); 944 uvm_unlock_pageq(); 945 simple_unlock(&uobj->vmobjlock); 946 UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0); 947 if (pgs != pgs_onstack) 948 kmem_free(pgs, pgs_size); 949 return (error); 950 } 951 952 out: 953 UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0); 954 uvm_lock_pageq(); 955 for (i = 0; i < npages; i++) { 956 pg = pgs[i]; 957 if (pg == NULL) { 958 continue; 959 } 960 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 961 pg, pg->flags, 0,0); 962 if (pg->flags & PG_FAKE && !overwrite) { 963 pg->flags &= ~(PG_FAKE); 964 pmap_clear_modify(pgs[i]); 965 } 966 KASSERT(!write || !blockalloc || (pg->flags & PG_RDONLY) == 0); 967 if (i < ridx || i >= ridx + orignpages || async) { 968 UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x", 969 pg, pg->offset,0,0); 970 if (pg->flags & PG_WANTED) { 971 wakeup(pg); 972 } 973 if (pg->flags & PG_FAKE) { 974 KASSERT(overwrite); 975 uvm_pagezero(pg); 976 } 977 if (pg->flags & PG_RELEASED) { 978 uvm_pagefree(pg); 979 continue; 980 } 981 uvm_pageenqueue(pg); 982 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); 983 UVM_PAGE_OWN(pg, NULL); 984 } 985 } 986 uvm_unlock_pageq(); 987 simple_unlock(&uobj->vmobjlock); 988 if (ap->a_m != NULL) { 989 memcpy(ap->a_m, &pgs[ridx], 990 orignpages * sizeof(struct vm_page *)); 991 } 992 if (pgs != pgs_onstack) 993 kmem_free(pgs, pgs_size); 994 return (0); 995 } 996 997 /* 998 * generic VM putpages routine. 999 * Write the given range of pages to backing store. 1000 * 1001 * => "offhi == 0" means flush all pages at or after "offlo". 1002 * => object should be locked by caller. we may _unlock_ the object 1003 * if (and only if) we need to clean a page (PGO_CLEANIT), or 1004 * if PGO_SYNCIO is set and there are pages busy. 1005 * we return with the object locked. 1006 * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O). 1007 * thus, a caller might want to unlock higher level resources 1008 * (e.g. vm_map) before calling flush. 1009 * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither 1010 * unlock the object nor block. 1011 * => if PGO_ALLPAGES is set, then all pages in the object will be processed. 1012 * => NOTE: we rely on the fact that the object's memq is a TAILQ and 1013 * that new pages are inserted on the tail end of the list. thus, 1014 * we can make a complete pass through the object in one go by starting 1015 * at the head and working towards the tail (new pages are put in 1016 * front of us). 1017 * => NOTE: we are allowed to lock the page queues, so the caller 1018 * must not be holding the page queue lock. 1019 * 1020 * note on "cleaning" object and PG_BUSY pages: 1021 * this routine is holding the lock on the object. the only time 1022 * that it can run into a PG_BUSY page that it does not own is if 1023 * some other process has started I/O on the page (e.g. either 1024 * a pagein, or a pageout). if the PG_BUSY page is being paged 1025 * in, then it can not be dirty (!PG_CLEAN) because no one has 1026 * had a chance to modify it yet. if the PG_BUSY page is being 1027 * paged out then it means that someone else has already started 1028 * cleaning the page for us (how nice!). in this case, if we 1029 * have syncio specified, then after we make our pass through the 1030 * object we need to wait for the other PG_BUSY pages to clear 1031 * off (i.e. we need to do an iosync). also note that once a 1032 * page is PG_BUSY it must stay in its object until it is un-busyed. 1033 * 1034 * note on page traversal: 1035 * we can traverse the pages in an object either by going down the 1036 * linked list in "uobj->memq", or we can go over the address range 1037 * by page doing hash table lookups for each address. depending 1038 * on how many pages are in the object it may be cheaper to do one 1039 * or the other. we set "by_list" to true if we are using memq. 1040 * if the cost of a hash lookup was equal to the cost of the list 1041 * traversal we could compare the number of pages in the start->stop 1042 * range to the total number of pages in the object. however, it 1043 * seems that a hash table lookup is more expensive than the linked 1044 * list traversal, so we multiply the number of pages in the 1045 * range by an estimate of the relatively higher cost of the hash lookup. 1046 */ 1047 1048 int 1049 genfs_putpages(void *v) 1050 { 1051 struct vop_putpages_args /* { 1052 struct vnode *a_vp; 1053 voff_t a_offlo; 1054 voff_t a_offhi; 1055 int a_flags; 1056 } */ *ap = v; 1057 struct vnode *vp = ap->a_vp; 1058 struct uvm_object *uobj = &vp->v_uobj; 1059 struct simplelock *slock = &uobj->vmobjlock; 1060 off_t startoff = ap->a_offlo; 1061 off_t endoff = ap->a_offhi; 1062 off_t off; 1063 int flags = ap->a_flags; 1064 /* Even for strange MAXPHYS, the shift rounds down to a page */ 1065 const int maxpages = MAXPHYS >> PAGE_SHIFT; 1066 int i, s, error, npages, nback; 1067 int freeflag; 1068 struct vm_page *pgs[maxpages], *pg, *nextpg, *tpg, curmp, endmp; 1069 boolean_t wasclean, by_list, needs_clean, yld; 1070 boolean_t async = (flags & PGO_SYNCIO) == 0; 1071 boolean_t pagedaemon = curproc == uvm.pagedaemon_proc; 1072 struct lwp *l = curlwp ? curlwp : &lwp0; 1073 struct genfs_node *gp = VTOG(vp); 1074 int dirtygen; 1075 boolean_t modified = FALSE; 1076 boolean_t cleanall; 1077 1078 UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist); 1079 1080 KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)); 1081 KASSERT((startoff & PAGE_MASK) == 0 && (endoff & PAGE_MASK) == 0); 1082 KASSERT(startoff < endoff || endoff == 0); 1083 1084 UVMHIST_LOG(ubchist, "vp %p pages %d off 0x%x len 0x%x", 1085 vp, uobj->uo_npages, startoff, endoff - startoff); 1086 1087 KASSERT((vp->v_flag & VONWORKLST) != 0 || 1088 (vp->v_flag & VWRITEMAPDIRTY) == 0); 1089 if (uobj->uo_npages == 0) { 1090 s = splbio(); 1091 if (vp->v_flag & VONWORKLST) { 1092 vp->v_flag &= ~VWRITEMAPDIRTY; 1093 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 1094 vp->v_flag &= ~VONWORKLST; 1095 LIST_REMOVE(vp, v_synclist); 1096 } 1097 } 1098 splx(s); 1099 simple_unlock(slock); 1100 return (0); 1101 } 1102 1103 /* 1104 * the vnode has pages, set up to process the request. 1105 */ 1106 1107 error = 0; 1108 s = splbio(); 1109 simple_lock(&global_v_numoutput_slock); 1110 wasclean = (vp->v_numoutput == 0); 1111 simple_unlock(&global_v_numoutput_slock); 1112 splx(s); 1113 off = startoff; 1114 if (endoff == 0 || flags & PGO_ALLPAGES) { 1115 endoff = trunc_page(LLONG_MAX); 1116 } 1117 by_list = (uobj->uo_npages <= 1118 ((endoff - startoff) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY); 1119 1120 #if !defined(DEBUG) 1121 /* 1122 * if this vnode is known not to have dirty pages, 1123 * don't bother to clean it out. 1124 */ 1125 1126 if ((vp->v_flag & VONWORKLST) == 0) { 1127 if ((flags & (PGO_FREE|PGO_DEACTIVATE)) == 0) { 1128 goto skip_scan; 1129 } 1130 flags &= ~PGO_CLEANIT; 1131 } 1132 #endif /* !defined(DEBUG) */ 1133 1134 /* 1135 * start the loop. when scanning by list, hold the last page 1136 * in the list before we start. pages allocated after we start 1137 * will be added to the end of the list, so we can stop at the 1138 * current last page. 1139 */ 1140 1141 cleanall = (flags & PGO_CLEANIT) != 0 && wasclean && 1142 startoff == 0 && endoff == trunc_page(LLONG_MAX) && 1143 (vp->v_flag & VONWORKLST) != 0; 1144 dirtygen = gp->g_dirtygen; 1145 freeflag = pagedaemon ? PG_PAGEOUT : PG_RELEASED; 1146 if (by_list) { 1147 curmp.uobject = uobj; 1148 curmp.offset = (voff_t)-1; 1149 curmp.flags = PG_BUSY; 1150 endmp.uobject = uobj; 1151 endmp.offset = (voff_t)-1; 1152 endmp.flags = PG_BUSY; 1153 pg = TAILQ_FIRST(&uobj->memq); 1154 TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq); 1155 PHOLD(l); 1156 } else { 1157 pg = uvm_pagelookup(uobj, off); 1158 } 1159 nextpg = NULL; 1160 while (by_list || off < endoff) { 1161 1162 /* 1163 * if the current page is not interesting, move on to the next. 1164 */ 1165 1166 KASSERT(pg == NULL || pg->uobject == uobj); 1167 KASSERT(pg == NULL || 1168 (pg->flags & (PG_RELEASED|PG_PAGEOUT)) == 0 || 1169 (pg->flags & PG_BUSY) != 0); 1170 if (by_list) { 1171 if (pg == &endmp) { 1172 break; 1173 } 1174 if (pg->offset < startoff || pg->offset >= endoff || 1175 pg->flags & (PG_RELEASED|PG_PAGEOUT)) { 1176 if (pg->flags & (PG_RELEASED|PG_PAGEOUT)) { 1177 wasclean = FALSE; 1178 } 1179 pg = TAILQ_NEXT(pg, listq); 1180 continue; 1181 } 1182 off = pg->offset; 1183 } else if (pg == NULL || pg->flags & (PG_RELEASED|PG_PAGEOUT)) { 1184 if (pg != NULL) { 1185 wasclean = FALSE; 1186 } 1187 off += PAGE_SIZE; 1188 if (off < endoff) { 1189 pg = uvm_pagelookup(uobj, off); 1190 } 1191 continue; 1192 } 1193 1194 /* 1195 * if the current page needs to be cleaned and it's busy, 1196 * wait for it to become unbusy. 1197 */ 1198 1199 yld = (l->l_cpu->ci_schedstate.spc_flags & 1200 SPCF_SHOULDYIELD) && !pagedaemon; 1201 if (pg->flags & PG_BUSY || yld) { 1202 UVMHIST_LOG(ubchist, "busy %p", pg,0,0,0); 1203 if (flags & PGO_BUSYFAIL && pg->flags & PG_BUSY) { 1204 UVMHIST_LOG(ubchist, "busyfail %p", pg, 0,0,0); 1205 error = EDEADLK; 1206 break; 1207 } 1208 KASSERT(!pagedaemon); 1209 if (by_list) { 1210 TAILQ_INSERT_BEFORE(pg, &curmp, listq); 1211 UVMHIST_LOG(ubchist, "curmp next %p", 1212 TAILQ_NEXT(&curmp, listq), 0,0,0); 1213 } 1214 if (yld) { 1215 simple_unlock(slock); 1216 preempt(1); 1217 simple_lock(slock); 1218 } else { 1219 pg->flags |= PG_WANTED; 1220 UVM_UNLOCK_AND_WAIT(pg, slock, 0, "genput", 0); 1221 simple_lock(slock); 1222 } 1223 if (by_list) { 1224 UVMHIST_LOG(ubchist, "after next %p", 1225 TAILQ_NEXT(&curmp, listq), 0,0,0); 1226 pg = TAILQ_NEXT(&curmp, listq); 1227 TAILQ_REMOVE(&uobj->memq, &curmp, listq); 1228 } else { 1229 pg = uvm_pagelookup(uobj, off); 1230 } 1231 continue; 1232 } 1233 1234 /* 1235 * if we're freeing, remove all mappings of the page now. 1236 * if we're cleaning, check if the page is needs to be cleaned. 1237 */ 1238 1239 if (flags & PGO_FREE) { 1240 pmap_page_protect(pg, VM_PROT_NONE); 1241 } else if (flags & PGO_CLEANIT) { 1242 1243 /* 1244 * if we still have some hope to pull this vnode off 1245 * from the syncer queue, write-protect the page. 1246 */ 1247 1248 if (cleanall && wasclean && 1249 gp->g_dirtygen == dirtygen) { 1250 1251 /* 1252 * uobj pages get wired only by uvm_fault 1253 * where uobj is locked. 1254 */ 1255 1256 if (pg->wire_count == 0) { 1257 pmap_page_protect(pg, 1258 VM_PROT_READ|VM_PROT_EXECUTE); 1259 } else { 1260 cleanall = FALSE; 1261 } 1262 } 1263 } 1264 1265 if (flags & PGO_CLEANIT) { 1266 needs_clean = pmap_clear_modify(pg) || 1267 (pg->flags & PG_CLEAN) == 0; 1268 pg->flags |= PG_CLEAN; 1269 } else { 1270 needs_clean = FALSE; 1271 } 1272 1273 /* 1274 * if we're cleaning, build a cluster. 1275 * the cluster will consist of pages which are currently dirty, 1276 * but they will be returned to us marked clean. 1277 * if not cleaning, just operate on the one page. 1278 */ 1279 1280 if (needs_clean) { 1281 KDASSERT((vp->v_flag & VONWORKLST)); 1282 wasclean = FALSE; 1283 memset(pgs, 0, sizeof(pgs)); 1284 pg->flags |= PG_BUSY; 1285 UVM_PAGE_OWN(pg, "genfs_putpages"); 1286 1287 /* 1288 * first look backward. 1289 */ 1290 1291 npages = MIN(maxpages >> 1, off >> PAGE_SHIFT); 1292 nback = npages; 1293 uvn_findpages(uobj, off - PAGE_SIZE, &nback, &pgs[0], 1294 UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY|UFP_BACKWARD); 1295 if (nback) { 1296 memmove(&pgs[0], &pgs[npages - nback], 1297 nback * sizeof(pgs[0])); 1298 if (npages - nback < nback) 1299 memset(&pgs[nback], 0, 1300 (npages - nback) * sizeof(pgs[0])); 1301 else 1302 memset(&pgs[npages - nback], 0, 1303 nback * sizeof(pgs[0])); 1304 } 1305 1306 /* 1307 * then plug in our page of interest. 1308 */ 1309 1310 pgs[nback] = pg; 1311 1312 /* 1313 * then look forward to fill in the remaining space in 1314 * the array of pages. 1315 */ 1316 1317 npages = maxpages - nback - 1; 1318 uvn_findpages(uobj, off + PAGE_SIZE, &npages, 1319 &pgs[nback + 1], 1320 UFP_NOWAIT|UFP_NOALLOC|UFP_DIRTYONLY); 1321 npages += nback + 1; 1322 } else { 1323 pgs[0] = pg; 1324 npages = 1; 1325 nback = 0; 1326 } 1327 1328 /* 1329 * apply FREE or DEACTIVATE options if requested. 1330 */ 1331 1332 if (flags & (PGO_DEACTIVATE|PGO_FREE)) { 1333 uvm_lock_pageq(); 1334 } 1335 for (i = 0; i < npages; i++) { 1336 tpg = pgs[i]; 1337 KASSERT(tpg->uobject == uobj); 1338 if (by_list && tpg == TAILQ_NEXT(pg, listq)) 1339 pg = tpg; 1340 if (tpg->offset < startoff || tpg->offset >= endoff) 1341 continue; 1342 if (flags & PGO_DEACTIVATE && tpg->wire_count == 0 1343 && tpg->loan_count == 0) { 1344 (void) pmap_clear_reference(tpg); 1345 uvm_pagedeactivate(tpg); 1346 } else if (flags & PGO_FREE) { 1347 pmap_page_protect(tpg, VM_PROT_NONE); 1348 if (tpg->flags & PG_BUSY) { 1349 tpg->flags |= freeflag; 1350 if (pagedaemon) { 1351 uvmexp.paging++; 1352 uvm_pagedequeue(tpg); 1353 } 1354 } else { 1355 1356 /* 1357 * ``page is not busy'' 1358 * implies that npages is 1 1359 * and needs_clean is false. 1360 */ 1361 1362 nextpg = TAILQ_NEXT(tpg, listq); 1363 uvm_pagefree(tpg); 1364 if (pagedaemon) 1365 uvmexp.pdfreed++; 1366 } 1367 } 1368 } 1369 if (flags & (PGO_DEACTIVATE|PGO_FREE)) { 1370 uvm_unlock_pageq(); 1371 } 1372 if (needs_clean) { 1373 modified = TRUE; 1374 1375 /* 1376 * start the i/o. if we're traversing by list, 1377 * keep our place in the list with a marker page. 1378 */ 1379 1380 if (by_list) { 1381 TAILQ_INSERT_AFTER(&uobj->memq, pg, &curmp, 1382 listq); 1383 } 1384 simple_unlock(slock); 1385 error = GOP_WRITE(vp, pgs, npages, flags); 1386 simple_lock(slock); 1387 if (by_list) { 1388 pg = TAILQ_NEXT(&curmp, listq); 1389 TAILQ_REMOVE(&uobj->memq, &curmp, listq); 1390 } 1391 if (error) { 1392 break; 1393 } 1394 if (by_list) { 1395 continue; 1396 } 1397 } 1398 1399 /* 1400 * find the next page and continue if there was no error. 1401 */ 1402 1403 if (by_list) { 1404 if (nextpg) { 1405 pg = nextpg; 1406 nextpg = NULL; 1407 } else { 1408 pg = TAILQ_NEXT(pg, listq); 1409 } 1410 } else { 1411 off += (npages - nback) << PAGE_SHIFT; 1412 if (off < endoff) { 1413 pg = uvm_pagelookup(uobj, off); 1414 } 1415 } 1416 } 1417 if (by_list) { 1418 TAILQ_REMOVE(&uobj->memq, &endmp, listq); 1419 PRELE(l); 1420 } 1421 1422 if (modified && (vp->v_flag & VWRITEMAPDIRTY) != 0 && 1423 (vp->v_type != VBLK || 1424 (vp->v_mount->mnt_flag & MNT_NODEVMTIME) == 0)) { 1425 GOP_MARKUPDATE(vp, GOP_UPDATE_MODIFIED); 1426 } 1427 1428 /* 1429 * if we're cleaning and there was nothing to clean, 1430 * take us off the syncer list. if we started any i/o 1431 * and we're doing sync i/o, wait for all writes to finish. 1432 */ 1433 1434 s = splbio(); 1435 if (cleanall && wasclean && gp->g_dirtygen == dirtygen && 1436 (vp->v_flag & VONWORKLST) != 0) { 1437 vp->v_flag &= ~VWRITEMAPDIRTY; 1438 if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 1439 vp->v_flag &= ~VONWORKLST; 1440 LIST_REMOVE(vp, v_synclist); 1441 } 1442 } 1443 splx(s); 1444 1445 #if !defined(DEBUG) 1446 skip_scan: 1447 #endif /* !defined(DEBUG) */ 1448 if (!wasclean && !async) { 1449 s = splbio(); 1450 /* 1451 * XXX - we want simple_unlock(&global_v_numoutput_slock); 1452 * but the slot in ltsleep() is taken! 1453 * XXX - try to recover from missed wakeups with a timeout.. 1454 * must think of something better. 1455 */ 1456 while (vp->v_numoutput != 0) { 1457 vp->v_flag |= VBWAIT; 1458 UVM_UNLOCK_AND_WAIT(&vp->v_numoutput, slock, FALSE, 1459 "genput2", hz); 1460 simple_lock(slock); 1461 } 1462 splx(s); 1463 } 1464 simple_unlock(&uobj->vmobjlock); 1465 return (error); 1466 } 1467 1468 int 1469 genfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags) 1470 { 1471 off_t off; 1472 vaddr_t kva; 1473 size_t len; 1474 int error; 1475 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1476 1477 UVMHIST_LOG(ubchist, "vp %p pgs %p npages %d flags 0x%x", 1478 vp, pgs, npages, flags); 1479 1480 off = pgs[0]->offset; 1481 kva = uvm_pagermapin(pgs, npages, 1482 UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK); 1483 len = npages << PAGE_SHIFT; 1484 1485 error = genfs_do_io(vp, off, kva, len, flags, UIO_WRITE, 1486 uvm_aio_biodone); 1487 1488 return error; 1489 } 1490 1491 /* 1492 * Backend routine for doing I/O to vnode pages. Pages are already locked 1493 * and mapped into kernel memory. Here we just look up the underlying 1494 * device block addresses and call the strategy routine. 1495 */ 1496 1497 static int 1498 genfs_do_io(struct vnode *vp, off_t off, vaddr_t kva, size_t len, int flags, 1499 enum uio_rw rw, void (*iodone)(struct buf *)) 1500 { 1501 int s, error, run; 1502 int fs_bshift, dev_bshift; 1503 off_t eof, offset, startoffset; 1504 size_t bytes, iobytes, skipbytes; 1505 daddr_t lbn, blkno; 1506 struct buf *mbp, *bp; 1507 struct vnode *devvp; 1508 boolean_t async = (flags & PGO_SYNCIO) == 0; 1509 boolean_t write = rw == UIO_WRITE; 1510 int brw = write ? B_WRITE : B_READ; 1511 UVMHIST_FUNC(__func__); UVMHIST_CALLED(ubchist); 1512 1513 UVMHIST_LOG(ubchist, "vp %p kva %p len 0x%x flags 0x%x", 1514 vp, kva, len, flags); 1515 1516 GOP_SIZE(vp, vp->v_size, &eof, 0); 1517 if (vp->v_type != VBLK) { 1518 fs_bshift = vp->v_mount->mnt_fs_bshift; 1519 dev_bshift = vp->v_mount->mnt_dev_bshift; 1520 } else { 1521 fs_bshift = DEV_BSHIFT; 1522 dev_bshift = DEV_BSHIFT; 1523 } 1524 error = 0; 1525 startoffset = off; 1526 bytes = MIN(len, eof - startoffset); 1527 skipbytes = 0; 1528 KASSERT(bytes != 0); 1529 1530 if (write) { 1531 s = splbio(); 1532 simple_lock(&global_v_numoutput_slock); 1533 vp->v_numoutput += 2; 1534 simple_unlock(&global_v_numoutput_slock); 1535 splx(s); 1536 } 1537 mbp = getiobuf(); 1538 UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", 1539 vp, mbp, vp->v_numoutput, bytes); 1540 mbp->b_bufsize = len; 1541 mbp->b_data = (void *)kva; 1542 mbp->b_resid = mbp->b_bcount = bytes; 1543 mbp->b_flags = B_BUSY | brw | B_AGE | (async ? (B_CALL | B_ASYNC) : 0); 1544 mbp->b_iodone = iodone; 1545 mbp->b_vp = vp; 1546 if (curproc == uvm.pagedaemon_proc) 1547 BIO_SETPRIO(mbp, BPRIO_TIMELIMITED); 1548 else if (async) 1549 BIO_SETPRIO(mbp, BPRIO_TIMENONCRITICAL); 1550 else 1551 BIO_SETPRIO(mbp, BPRIO_TIMECRITICAL); 1552 1553 bp = NULL; 1554 for (offset = startoffset; 1555 bytes > 0; 1556 offset += iobytes, bytes -= iobytes) { 1557 lbn = offset >> fs_bshift; 1558 error = VOP_BMAP(vp, lbn, &devvp, &blkno, &run); 1559 if (error) { 1560 UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0); 1561 skipbytes += bytes; 1562 bytes = 0; 1563 break; 1564 } 1565 1566 iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset, 1567 bytes); 1568 if (blkno == (daddr_t)-1) { 1569 if (!write) { 1570 memset((char *)kva + (offset - startoffset), 0, 1571 iobytes); 1572 } 1573 skipbytes += iobytes; 1574 continue; 1575 } 1576 1577 /* if it's really one i/o, don't make a second buf */ 1578 if (offset == startoffset && iobytes == bytes) { 1579 bp = mbp; 1580 } else { 1581 UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", 1582 vp, bp, vp->v_numoutput, 0); 1583 bp = getiobuf(); 1584 nestiobuf_setup(mbp, bp, offset - startoffset, iobytes); 1585 } 1586 bp->b_lblkno = 0; 1587 1588 /* adjust physical blkno for partial blocks */ 1589 bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >> 1590 dev_bshift); 1591 UVMHIST_LOG(ubchist, 1592 "vp %p offset 0x%x bcount 0x%x blkno 0x%x", 1593 vp, offset, bp->b_bcount, bp->b_blkno); 1594 1595 VOP_STRATEGY(devvp, bp); 1596 } 1597 if (skipbytes) { 1598 UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0); 1599 } 1600 nestiobuf_done(mbp, skipbytes, error); 1601 if (async) { 1602 UVMHIST_LOG(ubchist, "returning 0 (async)", 0,0,0,0); 1603 return (0); 1604 } 1605 UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0); 1606 error = biowait(mbp); 1607 s = splbio(); 1608 (*iodone)(mbp); 1609 splx(s); 1610 UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0); 1611 return (error); 1612 } 1613 1614 /* 1615 * VOP_PUTPAGES() for vnodes which never have pages. 1616 */ 1617 1618 int 1619 genfs_null_putpages(void *v) 1620 { 1621 struct vop_putpages_args /* { 1622 struct vnode *a_vp; 1623 voff_t a_offlo; 1624 voff_t a_offhi; 1625 int a_flags; 1626 } */ *ap = v; 1627 struct vnode *vp = ap->a_vp; 1628 1629 KASSERT(vp->v_uobj.uo_npages == 0); 1630 simple_unlock(&vp->v_interlock); 1631 return (0); 1632 } 1633 1634 void 1635 genfs_node_init(struct vnode *vp, const struct genfs_ops *ops) 1636 { 1637 struct genfs_node *gp = VTOG(vp); 1638 1639 lockinit(&gp->g_glock, PINOD, "glock", 0, 0); 1640 gp->g_op = ops; 1641 } 1642 1643 void 1644 genfs_size(struct vnode *vp, off_t size, off_t *eobp, int flags __unused) 1645 { 1646 int bsize; 1647 1648 bsize = 1 << vp->v_mount->mnt_fs_bshift; 1649 *eobp = (size + bsize - 1) & ~(bsize - 1); 1650 } 1651 1652 int 1653 genfs_compat_getpages(void *v) 1654 { 1655 struct vop_getpages_args /* { 1656 struct vnode *a_vp; 1657 voff_t a_offset; 1658 struct vm_page **a_m; 1659 int *a_count; 1660 int a_centeridx; 1661 vm_prot_t a_access_type; 1662 int a_advice; 1663 int a_flags; 1664 } */ *ap = v; 1665 1666 off_t origoffset; 1667 struct vnode *vp = ap->a_vp; 1668 struct uvm_object *uobj = &vp->v_uobj; 1669 struct vm_page *pg, **pgs; 1670 vaddr_t kva; 1671 int i, error, orignpages, npages; 1672 struct iovec iov; 1673 struct uio uio; 1674 kauth_cred_t cred = curlwp->l_cred; 1675 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 1676 1677 error = 0; 1678 origoffset = ap->a_offset; 1679 orignpages = *ap->a_count; 1680 pgs = ap->a_m; 1681 1682 if (write && (vp->v_flag & VONWORKLST) == 0) { 1683 vn_syncer_add_to_worklist(vp, filedelay); 1684 } 1685 if (ap->a_flags & PGO_LOCKED) { 1686 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, 1687 UFP_NOWAIT|UFP_NOALLOC| (write ? UFP_NORDONLY : 0)); 1688 1689 return (ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0); 1690 } 1691 if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= vp->v_size) { 1692 simple_unlock(&uobj->vmobjlock); 1693 return (EINVAL); 1694 } 1695 if ((ap->a_flags & PGO_SYNCIO) == 0) { 1696 simple_unlock(&uobj->vmobjlock); 1697 return 0; 1698 } 1699 npages = orignpages; 1700 uvn_findpages(uobj, origoffset, &npages, pgs, UFP_ALL); 1701 simple_unlock(&uobj->vmobjlock); 1702 kva = uvm_pagermapin(pgs, npages, 1703 UVMPAGER_MAPIN_READ | UVMPAGER_MAPIN_WAITOK); 1704 for (i = 0; i < npages; i++) { 1705 pg = pgs[i]; 1706 if ((pg->flags & PG_FAKE) == 0) { 1707 continue; 1708 } 1709 iov.iov_base = (char *)kva + (i << PAGE_SHIFT); 1710 iov.iov_len = PAGE_SIZE; 1711 uio.uio_iov = &iov; 1712 uio.uio_iovcnt = 1; 1713 uio.uio_offset = origoffset + (i << PAGE_SHIFT); 1714 uio.uio_rw = UIO_READ; 1715 uio.uio_resid = PAGE_SIZE; 1716 UIO_SETUP_SYSSPACE(&uio); 1717 /* XXX vn_lock */ 1718 error = VOP_READ(vp, &uio, 0, cred); 1719 if (error) { 1720 break; 1721 } 1722 if (uio.uio_resid) { 1723 memset(iov.iov_base, 0, uio.uio_resid); 1724 } 1725 } 1726 uvm_pagermapout(kva, npages); 1727 simple_lock(&uobj->vmobjlock); 1728 uvm_lock_pageq(); 1729 for (i = 0; i < npages; i++) { 1730 pg = pgs[i]; 1731 if (error && (pg->flags & PG_FAKE) != 0) { 1732 pg->flags |= PG_RELEASED; 1733 } else { 1734 pmap_clear_modify(pg); 1735 uvm_pageactivate(pg); 1736 } 1737 } 1738 if (error) { 1739 uvm_page_unbusy(pgs, npages); 1740 } 1741 uvm_unlock_pageq(); 1742 simple_unlock(&uobj->vmobjlock); 1743 return (error); 1744 } 1745 1746 int 1747 genfs_compat_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, 1748 int flags __unused) 1749 { 1750 off_t offset; 1751 struct iovec iov; 1752 struct uio uio; 1753 kauth_cred_t cred = curlwp->l_cred; 1754 struct buf *bp; 1755 vaddr_t kva; 1756 int s, error; 1757 1758 offset = pgs[0]->offset; 1759 kva = uvm_pagermapin(pgs, npages, 1760 UVMPAGER_MAPIN_WRITE | UVMPAGER_MAPIN_WAITOK); 1761 1762 iov.iov_base = (void *)kva; 1763 iov.iov_len = npages << PAGE_SHIFT; 1764 uio.uio_iov = &iov; 1765 uio.uio_iovcnt = 1; 1766 uio.uio_offset = offset; 1767 uio.uio_rw = UIO_WRITE; 1768 uio.uio_resid = npages << PAGE_SHIFT; 1769 UIO_SETUP_SYSSPACE(&uio); 1770 /* XXX vn_lock */ 1771 error = VOP_WRITE(vp, &uio, 0, cred); 1772 1773 s = splbio(); 1774 V_INCR_NUMOUTPUT(vp); 1775 splx(s); 1776 1777 bp = getiobuf(); 1778 bp->b_flags = B_BUSY | B_WRITE | B_AGE; 1779 bp->b_vp = vp; 1780 bp->b_lblkno = offset >> vp->v_mount->mnt_fs_bshift; 1781 bp->b_data = (char *)kva; 1782 bp->b_bcount = npages << PAGE_SHIFT; 1783 bp->b_bufsize = npages << PAGE_SHIFT; 1784 bp->b_resid = 0; 1785 if (error) { 1786 bp->b_flags |= B_ERROR; 1787 bp->b_error = error; 1788 } 1789 uvm_aio_aiodone(bp); 1790 return (error); 1791 } 1792 1793 /* 1794 * Process a uio using direct I/O. If we reach a part of the request 1795 * which cannot be processed in this fashion for some reason, just return. 1796 * The caller must handle some additional part of the request using 1797 * buffered I/O before trying direct I/O again. 1798 */ 1799 1800 void 1801 genfs_directio(struct vnode *vp, struct uio *uio, int ioflag __unused) 1802 { 1803 struct vmspace *vs; 1804 struct iovec *iov; 1805 vaddr_t va; 1806 size_t len; 1807 const int mask = DEV_BSIZE - 1; 1808 int error; 1809 1810 /* 1811 * We only support direct I/O to user space for now. 1812 */ 1813 1814 if (VMSPACE_IS_KERNEL_P(uio->uio_vmspace)) { 1815 return; 1816 } 1817 1818 /* 1819 * If the vnode is mapped, we would need to get the getpages lock 1820 * to stabilize the bmap, but then we would get into trouble whil e 1821 * locking the pages if the pages belong to this same vnode (or a 1822 * multi-vnode cascade to the same effect). Just fall back to 1823 * buffered I/O if the vnode is mapped to avoid this mess. 1824 */ 1825 1826 if (vp->v_flag & VMAPPED) { 1827 return; 1828 } 1829 1830 /* 1831 * Do as much of the uio as possible with direct I/O. 1832 */ 1833 1834 vs = uio->uio_vmspace; 1835 while (uio->uio_resid) { 1836 iov = uio->uio_iov; 1837 if (iov->iov_len == 0) { 1838 uio->uio_iov++; 1839 uio->uio_iovcnt--; 1840 continue; 1841 } 1842 va = (vaddr_t)iov->iov_base; 1843 len = MIN(iov->iov_len, genfs_maxdio); 1844 len &= ~mask; 1845 1846 /* 1847 * If the next chunk is smaller than DEV_BSIZE or extends past 1848 * the current EOF, then fall back to buffered I/O. 1849 */ 1850 1851 if (len == 0 || uio->uio_offset + len > vp->v_size) { 1852 return; 1853 } 1854 1855 /* 1856 * Check alignment. The file offset must be at least 1857 * sector-aligned. The exact constraint on memory alignment 1858 * is very hardware-dependent, but requiring sector-aligned 1859 * addresses there too is safe. 1860 */ 1861 1862 if (uio->uio_offset & mask || va & mask) { 1863 return; 1864 } 1865 error = genfs_do_directio(vs, va, len, vp, uio->uio_offset, 1866 uio->uio_rw); 1867 if (error) { 1868 break; 1869 } 1870 iov->iov_base = (caddr_t)iov->iov_base + len; 1871 iov->iov_len -= len; 1872 uio->uio_offset += len; 1873 uio->uio_resid -= len; 1874 } 1875 } 1876 1877 /* 1878 * Iodone routine for direct I/O. We don't do much here since the request is 1879 * always synchronous, so the caller will do most of the work after biowait(). 1880 */ 1881 1882 static void 1883 genfs_dio_iodone(struct buf *bp) 1884 { 1885 int s; 1886 1887 KASSERT((bp->b_flags & B_ASYNC) == 0); 1888 s = splbio(); 1889 if ((bp->b_flags & (B_READ | B_AGE)) == B_AGE) { 1890 vwakeup(bp); 1891 } 1892 putiobuf(bp); 1893 splx(s); 1894 } 1895 1896 /* 1897 * Process one chunk of a direct I/O request. 1898 */ 1899 1900 static int 1901 genfs_do_directio(struct vmspace *vs, vaddr_t uva, size_t len, struct vnode *vp, 1902 off_t off, enum uio_rw rw) 1903 { 1904 struct vm_map *map; 1905 struct pmap *upm, *kpm; 1906 size_t klen = round_page(uva + len) - trunc_page(uva); 1907 off_t spoff, epoff; 1908 vaddr_t kva, puva; 1909 paddr_t pa; 1910 vm_prot_t prot; 1911 int error, rv, poff, koff; 1912 const int pgoflags = PGO_CLEANIT | PGO_SYNCIO | 1913 (rw == UIO_WRITE ? PGO_FREE : 0); 1914 1915 /* 1916 * For writes, verify that this range of the file already has fully 1917 * allocated backing store. If there are any holes, just punt and 1918 * make the caller take the buffered write path. 1919 */ 1920 1921 if (rw == UIO_WRITE) { 1922 daddr_t lbn, elbn, blkno; 1923 int bsize, bshift, run; 1924 1925 bshift = vp->v_mount->mnt_fs_bshift; 1926 bsize = 1 << bshift; 1927 lbn = off >> bshift; 1928 elbn = (off + len + bsize - 1) >> bshift; 1929 while (lbn < elbn) { 1930 error = VOP_BMAP(vp, lbn, NULL, &blkno, &run); 1931 if (error) { 1932 return error; 1933 } 1934 if (blkno == (daddr_t)-1) { 1935 return ENOSPC; 1936 } 1937 lbn += 1 + run; 1938 } 1939 } 1940 1941 /* 1942 * Flush any cached pages for parts of the file that we're about to 1943 * access. If we're writing, invalidate pages as well. 1944 */ 1945 1946 spoff = trunc_page(off); 1947 epoff = round_page(off + len); 1948 simple_lock(&vp->v_interlock); 1949 error = VOP_PUTPAGES(vp, spoff, epoff, pgoflags); 1950 if (error) { 1951 return error; 1952 } 1953 1954 /* 1955 * Wire the user pages and remap them into kernel memory. 1956 */ 1957 1958 prot = rw == UIO_READ ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ; 1959 error = uvm_vslock(vs, (void *)uva, len, prot); 1960 if (error) { 1961 return error; 1962 } 1963 1964 map = &vs->vm_map; 1965 upm = vm_map_pmap(map); 1966 kpm = vm_map_pmap(kernel_map); 1967 kva = uvm_km_alloc(kernel_map, klen, 0, 1968 UVM_KMF_VAONLY | UVM_KMF_WAITVA); 1969 puva = trunc_page(uva); 1970 for (poff = 0; poff < klen; poff += PAGE_SIZE) { 1971 rv = pmap_extract(upm, puva + poff, &pa); 1972 KASSERT(rv); 1973 pmap_enter(kpm, kva + poff, pa, prot, prot | PMAP_WIRED); 1974 } 1975 pmap_update(kpm); 1976 1977 /* 1978 * Do the I/O. 1979 */ 1980 1981 koff = uva - trunc_page(uva); 1982 error = genfs_do_io(vp, off, kva + koff, len, PGO_SYNCIO, rw, 1983 genfs_dio_iodone); 1984 1985 /* 1986 * Tear down the kernel mapping. 1987 */ 1988 1989 pmap_remove(kpm, kva, kva + klen); 1990 pmap_update(kpm); 1991 uvm_km_free(kernel_map, kva, klen, UVM_KMF_VAONLY); 1992 1993 /* 1994 * Unwire the user pages. 1995 */ 1996 1997 uvm_vsunlock(vs, (void *)uva, len); 1998 return error; 1999 } 2000 2001 2002 static void 2003 filt_genfsdetach(struct knote *kn) 2004 { 2005 struct vnode *vp = (struct vnode *)kn->kn_hook; 2006 2007 /* XXXLUKEM lock the struct? */ 2008 SLIST_REMOVE(&vp->v_klist, kn, knote, kn_selnext); 2009 } 2010 2011 static int 2012 filt_genfsread(struct knote *kn, long hint) 2013 { 2014 struct vnode *vp = (struct vnode *)kn->kn_hook; 2015 2016 /* 2017 * filesystem is gone, so set the EOF flag and schedule 2018 * the knote for deletion. 2019 */ 2020 if (hint == NOTE_REVOKE) { 2021 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 2022 return (1); 2023 } 2024 2025 /* XXXLUKEM lock the struct? */ 2026 kn->kn_data = vp->v_size - kn->kn_fp->f_offset; 2027 return (kn->kn_data != 0); 2028 } 2029 2030 static int 2031 filt_genfsvnode(struct knote *kn, long hint) 2032 { 2033 2034 if (kn->kn_sfflags & hint) 2035 kn->kn_fflags |= hint; 2036 if (hint == NOTE_REVOKE) { 2037 kn->kn_flags |= EV_EOF; 2038 return (1); 2039 } 2040 return (kn->kn_fflags != 0); 2041 } 2042 2043 static const struct filterops genfsread_filtops = 2044 { 1, NULL, filt_genfsdetach, filt_genfsread }; 2045 static const struct filterops genfsvnode_filtops = 2046 { 1, NULL, filt_genfsdetach, filt_genfsvnode }; 2047 2048 int 2049 genfs_kqfilter(void *v) 2050 { 2051 struct vop_kqfilter_args /* { 2052 struct vnode *a_vp; 2053 struct knote *a_kn; 2054 } */ *ap = v; 2055 struct vnode *vp; 2056 struct knote *kn; 2057 2058 vp = ap->a_vp; 2059 kn = ap->a_kn; 2060 switch (kn->kn_filter) { 2061 case EVFILT_READ: 2062 kn->kn_fop = &genfsread_filtops; 2063 break; 2064 case EVFILT_VNODE: 2065 kn->kn_fop = &genfsvnode_filtops; 2066 break; 2067 default: 2068 return (1); 2069 } 2070 2071 kn->kn_hook = vp; 2072 2073 /* XXXLUKEM lock the struct? */ 2074 SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext); 2075 2076 return (0); 2077 } 2078 2079 void 2080 genfs_node_wrlock(struct vnode *vp) 2081 { 2082 struct genfs_node *gp = VTOG(vp); 2083 2084 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL); 2085 } 2086 2087 void 2088 genfs_node_rdlock(struct vnode *vp) 2089 { 2090 struct genfs_node *gp = VTOG(vp); 2091 2092 lockmgr(&gp->g_glock, LK_SHARED, NULL); 2093 } 2094 2095 void 2096 genfs_node_unlock(struct vnode *vp) 2097 { 2098 struct genfs_node *gp = VTOG(vp); 2099 2100 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 2101 } 2102