1 /* $NetBSD: nfs_bio.c,v 1.144 2006/06/30 09:55:34 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.144 2006/06/30 09:55:34 yamt Exp $"); 39 40 #include "opt_nfs.h" 41 #include "opt_ddb.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/resourcevar.h> 46 #include <sys/signalvar.h> 47 #include <sys/proc.h> 48 #include <sys/buf.h> 49 #include <sys/vnode.h> 50 #include <sys/mount.h> 51 #include <sys/kernel.h> 52 #include <sys/namei.h> 53 #include <sys/dirent.h> 54 #include <sys/malloc.h> 55 #include <sys/kauth.h> 56 57 #include <uvm/uvm_extern.h> 58 #include <uvm/uvm.h> 59 60 #include <nfs/rpcv2.h> 61 #include <nfs/nfsproto.h> 62 #include <nfs/nfs.h> 63 #include <nfs/nfsmount.h> 64 #include <nfs/nqnfs.h> 65 #include <nfs/nfsnode.h> 66 #include <nfs/nfs_var.h> 67 68 extern int nfs_numasync; 69 extern int nfs_commitsize; 70 extern struct nfsstats nfsstats; 71 72 static int nfs_doio_read __P((struct buf *, struct uio *)); 73 static int nfs_doio_write __P((struct buf *, struct uio *)); 74 static int nfs_doio_phys __P((struct buf *, struct uio *)); 75 76 /* 77 * Vnode op for read using bio 78 * Any similarity to readip() is purely coincidental 79 */ 80 int 81 nfs_bioread(vp, uio, ioflag, cred, cflag) 82 struct vnode *vp; 83 struct uio *uio; 84 int ioflag, cflag; 85 kauth_cred_t cred; 86 { 87 struct nfsnode *np = VTONFS(vp); 88 struct buf *bp = NULL, *rabp; 89 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 90 struct nfsdircache *ndp = NULL, *nndp = NULL; 91 caddr_t baddr; 92 int got_buf = 0, error = 0, n = 0, on = 0, en, enn; 93 int enough = 0; 94 struct dirent *dp, *pdp, *edp, *ep; 95 off_t curoff = 0; 96 int advice; 97 struct lwp *l = curlwp; 98 99 #ifdef DIAGNOSTIC 100 if (uio->uio_rw != UIO_READ) 101 panic("nfs_read mode"); 102 #endif 103 if (uio->uio_resid == 0) 104 return (0); 105 if (vp->v_type != VDIR && uio->uio_offset < 0) 106 return (EINVAL); 107 #ifndef NFS_V2_ONLY 108 if ((nmp->nm_flag & NFSMNT_NFSV3) && 109 !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) 110 (void)nfs_fsinfo(nmp, vp, cred, l); 111 #endif 112 if (vp->v_type != VDIR && 113 (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 114 return (EFBIG); 115 116 /* 117 * For nfs, cache consistency can only be maintained approximately. 118 * Although RFC1094 does not specify the criteria, the following is 119 * believed to be compatible with the reference port. 120 * For nqnfs, full cache consistency is maintained within the loop. 121 * For nfs: 122 * If the file's modify time on the server has changed since the 123 * last read rpc or you have written to the file, 124 * you may have lost data cache consistency with the 125 * server, so flush all of the file's data out of the cache. 126 * Then force a getattr rpc to ensure that you have up to date 127 * attributes. 128 * NB: This implies that cache data can be read when up to 129 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 130 * attributes this could be forced by setting n_attrstamp to 0 before 131 * the VOP_GETATTR() call. 132 */ 133 134 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 135 error = nfs_flushstalebuf(vp, cred, l, 136 NFS_FLUSHSTALEBUF_MYWRITE); 137 if (error) 138 return error; 139 } 140 141 do { 142 #ifndef NFS_V2_ONLY 143 /* 144 * Get a valid lease. If cached data is stale, flush it. 145 */ 146 if (nmp->nm_flag & NFSMNT_NQNFS) { 147 if (NQNFS_CKINVALID(vp, np, ND_READ)) { 148 do { 149 error = nqnfs_getlease(vp, ND_READ, cred, l); 150 } while (error == NQNFS_EXPIRED); 151 if (error) 152 return (error); 153 if (np->n_lrev != np->n_brev || 154 (np->n_flag & NQNFSNONCACHE) || 155 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 156 if (vp->v_type == VDIR) { 157 nfs_invaldircache(vp, 0); 158 } 159 error = nfs_vinvalbuf(vp, V_SAVE, cred, l, 1); 160 if (error) 161 return (error); 162 np->n_brev = np->n_lrev; 163 } 164 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 165 nfs_invaldircache(vp, 0); 166 error = nfs_vinvalbuf(vp, V_SAVE, cred, l, 1); 167 if (error) 168 return (error); 169 } 170 } 171 #endif 172 /* 173 * Don't cache symlinks. 174 */ 175 if (np->n_flag & NQNFSNONCACHE 176 || ((vp->v_flag & VROOT) && vp->v_type == VLNK)) { 177 switch (vp->v_type) { 178 case VREG: 179 return (nfs_readrpc(vp, uio)); 180 case VLNK: 181 return (nfs_readlinkrpc(vp, uio, cred)); 182 case VDIR: 183 break; 184 default: 185 printf(" NQNFSNONCACHE: type %x unexpected\n", 186 vp->v_type); 187 }; 188 } 189 baddr = (caddr_t)0; 190 switch (vp->v_type) { 191 case VREG: 192 nfsstats.biocache_reads++; 193 194 advice = IO_ADV_DECODE(ioflag); 195 error = 0; 196 while (uio->uio_resid > 0) { 197 void *win; 198 int flags; 199 vsize_t bytelen; 200 201 nfs_delayedtruncate(vp); 202 if (np->n_size <= uio->uio_offset) { 203 break; 204 } 205 bytelen = 206 MIN(np->n_size - uio->uio_offset, uio->uio_resid); 207 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, 208 &bytelen, advice, UBC_READ); 209 error = uiomove(win, bytelen, uio); 210 flags = UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0; 211 ubc_release(win, flags); 212 if (error) { 213 /* 214 * XXXkludge 215 * the file has been truncated on the server. 216 * there isn't much we can do. 217 */ 218 if (uio->uio_offset >= np->n_size) { 219 /* end of file */ 220 error = 0; 221 } else { 222 break; 223 } 224 } 225 } 226 break; 227 228 case VLNK: 229 nfsstats.biocache_readlinks++; 230 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, l); 231 if (!bp) 232 return (EINTR); 233 if ((bp->b_flags & B_DONE) == 0) { 234 bp->b_flags |= B_READ; 235 error = nfs_doio(bp); 236 if (error) { 237 brelse(bp); 238 return (error); 239 } 240 } 241 n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 242 got_buf = 1; 243 on = 0; 244 break; 245 case VDIR: 246 diragain: 247 nfsstats.biocache_readdirs++; 248 ndp = nfs_searchdircache(vp, uio->uio_offset, 249 (nmp->nm_flag & NFSMNT_XLATECOOKIE), 0); 250 if (!ndp) { 251 /* 252 * We've been handed a cookie that is not 253 * in the cache. If we're not translating 254 * 32 <-> 64, it may be a value that was 255 * flushed out of the cache because it grew 256 * too big. Let the server judge if it's 257 * valid or not. In the translation case, 258 * we have no way of validating this value, 259 * so punt. 260 */ 261 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) 262 return (EINVAL); 263 ndp = nfs_enterdircache(vp, uio->uio_offset, 264 uio->uio_offset, 0, 0); 265 } 266 267 if (NFS_EOFVALID(np) && 268 ndp->dc_cookie == np->n_direofoffset) { 269 nfs_putdircache(np, ndp); 270 nfsstats.direofcache_hits++; 271 return (0); 272 } 273 274 bp = nfs_getcacheblk(vp, NFSDC_BLKNO(ndp), NFS_DIRBLKSIZ, l); 275 if (!bp) 276 return (EINTR); 277 if ((bp->b_flags & B_DONE) == 0) { 278 bp->b_flags |= B_READ; 279 bp->b_dcookie = ndp->dc_blkcookie; 280 error = nfs_doio(bp); 281 if (error) { 282 /* 283 * Yuck! The directory has been modified on the 284 * server. Punt and let the userland code 285 * deal with it. 286 */ 287 nfs_putdircache(np, ndp); 288 brelse(bp); 289 /* 290 * nfs_request maps NFSERR_BAD_COOKIE to EINVAL. 291 */ 292 if (error == EINVAL) { /* NFSERR_BAD_COOKIE */ 293 nfs_invaldircache(vp, 0); 294 nfs_vinvalbuf(vp, 0, cred, l, 1); 295 } 296 return (error); 297 } 298 } 299 300 /* 301 * Just return if we hit EOF right away with this 302 * block. Always check here, because direofoffset 303 * may have been set by an nfsiod since the last 304 * check. 305 * 306 * also, empty block implies EOF. 307 */ 308 309 if (bp->b_bcount == bp->b_resid || 310 (NFS_EOFVALID(np) && 311 ndp->dc_blkcookie == np->n_direofoffset)) { 312 KASSERT(bp->b_bcount != bp->b_resid || 313 ndp->dc_blkcookie == bp->b_dcookie); 314 nfs_putdircache(np, ndp); 315 bp->b_flags |= B_NOCACHE; 316 brelse(bp); 317 return 0; 318 } 319 320 /* 321 * Find the entry we were looking for in the block. 322 */ 323 324 en = ndp->dc_entry; 325 326 pdp = dp = (struct dirent *)bp->b_data; 327 edp = (struct dirent *)(void *)(bp->b_data + bp->b_bcount - 328 bp->b_resid); 329 enn = 0; 330 while (enn < en && dp < edp) { 331 pdp = dp; 332 dp = _DIRENT_NEXT(dp); 333 enn++; 334 } 335 336 /* 337 * If the entry number was bigger than the number of 338 * entries in the block, or the cookie of the previous 339 * entry doesn't match, the directory cache is 340 * stale. Flush it and try again (i.e. go to 341 * the server). 342 */ 343 if (dp >= edp || (struct dirent *)_DIRENT_NEXT(dp) > edp || 344 (en > 0 && NFS_GETCOOKIE(pdp) != ndp->dc_cookie)) { 345 #ifdef DEBUG 346 printf("invalid cache: %p %p %p off %lx %lx\n", 347 pdp, dp, edp, 348 (unsigned long)uio->uio_offset, 349 (unsigned long)NFS_GETCOOKIE(pdp)); 350 #endif 351 nfs_putdircache(np, ndp); 352 brelse(bp); 353 nfs_invaldircache(vp, 0); 354 nfs_vinvalbuf(vp, 0, cred, l, 0); 355 goto diragain; 356 } 357 358 on = (caddr_t)dp - bp->b_data; 359 360 /* 361 * Cache all entries that may be exported to the 362 * user, as they may be thrown back at us. The 363 * NFSBIO_CACHECOOKIES flag indicates that all 364 * entries are being 'exported', so cache them all. 365 */ 366 367 if (en == 0 && pdp == dp) { 368 dp = _DIRENT_NEXT(dp); 369 enn++; 370 } 371 372 if (uio->uio_resid < (bp->b_bcount - bp->b_resid - on)) { 373 n = uio->uio_resid; 374 enough = 1; 375 } else 376 n = bp->b_bcount - bp->b_resid - on; 377 378 ep = (struct dirent *)(void *)(bp->b_data + on + n); 379 380 /* 381 * Find last complete entry to copy, caching entries 382 * (if requested) as we go. 383 */ 384 385 while (dp < ep && (struct dirent *)_DIRENT_NEXT(dp) <= ep) { 386 if (cflag & NFSBIO_CACHECOOKIES) { 387 nndp = nfs_enterdircache(vp, NFS_GETCOOKIE(pdp), 388 ndp->dc_blkcookie, enn, bp->b_lblkno); 389 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 390 NFS_STASHCOOKIE32(pdp, 391 nndp->dc_cookie32); 392 } 393 nfs_putdircache(np, nndp); 394 } 395 pdp = dp; 396 dp = _DIRENT_NEXT(dp); 397 enn++; 398 } 399 nfs_putdircache(np, ndp); 400 401 /* 402 * If the last requested entry was not the last in the 403 * buffer (happens if NFS_DIRFRAGSIZ < NFS_DIRBLKSIZ), 404 * cache the cookie of the last requested one, and 405 * set of the offset to it. 406 */ 407 408 if ((on + n) < bp->b_bcount - bp->b_resid) { 409 curoff = NFS_GETCOOKIE(pdp); 410 nndp = nfs_enterdircache(vp, curoff, ndp->dc_blkcookie, 411 enn, bp->b_lblkno); 412 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 413 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); 414 curoff = nndp->dc_cookie32; 415 } 416 nfs_putdircache(np, nndp); 417 } else 418 curoff = bp->b_dcookie; 419 420 /* 421 * Always cache the entry for the next block, 422 * so that readaheads can use it. 423 */ 424 nndp = nfs_enterdircache(vp, bp->b_dcookie, bp->b_dcookie, 0,0); 425 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 426 if (curoff == bp->b_dcookie) { 427 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); 428 curoff = nndp->dc_cookie32; 429 } 430 } 431 432 n = (char *)_DIRENT_NEXT(pdp) - (bp->b_data + on); 433 434 /* 435 * If not eof and read aheads are enabled, start one. 436 * (You need the current block first, so that you have the 437 * directory offset cookie of the next block.) 438 */ 439 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 440 !NFS_EOFVALID(np) && !(np->n_flag & NQNFSNONCACHE)) { 441 rabp = nfs_getcacheblk(vp, NFSDC_BLKNO(nndp), 442 NFS_DIRBLKSIZ, l); 443 if (rabp) { 444 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 445 rabp->b_dcookie = nndp->dc_cookie; 446 rabp->b_flags |= (B_READ | B_ASYNC); 447 if (nfs_asyncio(rabp)) { 448 rabp->b_flags |= B_INVAL; 449 brelse(rabp); 450 } 451 } else 452 brelse(rabp); 453 } 454 } 455 nfs_putdircache(np, nndp); 456 got_buf = 1; 457 break; 458 default: 459 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 460 break; 461 } 462 463 if (n > 0) { 464 if (!baddr) 465 baddr = bp->b_data; 466 error = uiomove(baddr + on, (int)n, uio); 467 } 468 switch (vp->v_type) { 469 case VREG: 470 break; 471 case VLNK: 472 n = 0; 473 break; 474 case VDIR: 475 if (np->n_flag & NQNFSNONCACHE) 476 bp->b_flags |= B_INVAL; 477 uio->uio_offset = curoff; 478 if (enough) 479 n = 0; 480 break; 481 default: 482 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 483 } 484 if (got_buf) 485 brelse(bp); 486 } while (error == 0 && uio->uio_resid > 0 && n > 0); 487 return (error); 488 } 489 490 /* 491 * Vnode op for write using bio 492 */ 493 int 494 nfs_write(v) 495 void *v; 496 { 497 struct vop_write_args /* { 498 struct vnode *a_vp; 499 struct uio *a_uio; 500 int a_ioflag; 501 kauth_cred_t a_cred; 502 } */ *ap = v; 503 struct uio *uio = ap->a_uio; 504 struct lwp *l = curlwp; 505 struct vnode *vp = ap->a_vp; 506 struct nfsnode *np = VTONFS(vp); 507 kauth_cred_t cred = ap->a_cred; 508 struct vattr vattr; 509 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 510 void *win; 511 voff_t oldoff, origoff; 512 vsize_t bytelen; 513 int flags, error = 0; 514 int ioflag = ap->a_ioflag; 515 int extended = 0, wrotedata = 0; 516 517 #ifdef DIAGNOSTIC 518 if (uio->uio_rw != UIO_WRITE) 519 panic("nfs_write mode"); 520 #endif 521 if (vp->v_type != VREG) 522 return (EIO); 523 if (np->n_flag & NWRITEERR) { 524 np->n_flag &= ~NWRITEERR; 525 return (np->n_error); 526 } 527 #ifndef NFS_V2_ONLY 528 if ((nmp->nm_flag & NFSMNT_NFSV3) && 529 !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) 530 (void)nfs_fsinfo(nmp, vp, cred, l); 531 #endif 532 if (ioflag & (IO_APPEND | IO_SYNC)) { 533 if (np->n_flag & NMODIFIED) { 534 NFS_INVALIDATE_ATTRCACHE(np); 535 error = nfs_vinvalbuf(vp, V_SAVE, cred, l, 1); 536 if (error) 537 return (error); 538 } 539 if (ioflag & IO_APPEND) { 540 NFS_INVALIDATE_ATTRCACHE(np); 541 error = VOP_GETATTR(vp, &vattr, cred, l); 542 if (error) 543 return (error); 544 uio->uio_offset = np->n_size; 545 } 546 } 547 if (uio->uio_offset < 0) 548 return (EINVAL); 549 if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 550 return (EFBIG); 551 if (uio->uio_resid == 0) 552 return (0); 553 /* 554 * Maybe this should be above the vnode op call, but so long as 555 * file servers have no limits, i don't think it matters 556 */ 557 if (l && l->l_proc && uio->uio_offset + uio->uio_resid > 558 l->l_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 559 psignal(l->l_proc, SIGXFSZ); 560 return (EFBIG); 561 } 562 563 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { 564 int iomode = NFSV3WRITE_FILESYNC; 565 boolean_t stalewriteverf = FALSE; 566 567 lockmgr(&nmp->nm_writeverflock, LK_SHARED, NULL); 568 error = nfs_writerpc(vp, uio, &iomode, FALSE, &stalewriteverf); 569 lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL); 570 if (stalewriteverf) 571 nfs_clearcommit(vp->v_mount); 572 return (error); 573 } 574 575 origoff = uio->uio_offset; 576 do { 577 boolean_t extending; /* if we are extending whole pages */ 578 u_quad_t oldsize; 579 oldoff = uio->uio_offset; 580 bytelen = uio->uio_resid; 581 582 #ifndef NFS_V2_ONLY 583 /* 584 * Check for a valid write lease. 585 */ 586 if ((nmp->nm_flag & NFSMNT_NQNFS) && 587 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 588 do { 589 error = nqnfs_getlease(vp, ND_WRITE, cred, l); 590 } while (error == NQNFS_EXPIRED); 591 if (error) 592 return (error); 593 if (np->n_lrev != np->n_brev || 594 (np->n_flag & NQNFSNONCACHE)) { 595 error = nfs_vinvalbuf(vp, V_SAVE, cred, l, 1); 596 if (error) 597 return (error); 598 np->n_brev = np->n_lrev; 599 } 600 } 601 #endif 602 nfsstats.biocache_writes++; 603 604 oldsize = np->n_size; 605 np->n_flag |= NMODIFIED; 606 if (np->n_size < uio->uio_offset + bytelen) { 607 np->n_size = uio->uio_offset + bytelen; 608 } 609 extending = ((uio->uio_offset & PAGE_MASK) == 0 && 610 (bytelen & PAGE_MASK) == 0 && 611 uio->uio_offset >= vp->v_size); 612 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen, 613 UVM_ADV_NORMAL, 614 UBC_WRITE | (extending ? UBC_FAULTBUSY : 0)); 615 error = uiomove(win, bytelen, uio); 616 flags = UBC_WANT_UNMAP(vp) ? UBC_UNMAP : 0; 617 ubc_release(win, flags); 618 if (error) { 619 if (extending) { 620 /* 621 * backout size and free pages past eof. 622 */ 623 np->n_size = oldsize; 624 simple_lock(&vp->v_interlock); 625 (void)VOP_PUTPAGES(vp, round_page(vp->v_size), 626 0, PGO_SYNCIO | PGO_FREE); 627 } 628 break; 629 } 630 wrotedata = 1; 631 632 /* 633 * update UVM's notion of the size now that we've 634 * copied the data into the vnode's pages. 635 */ 636 637 if (vp->v_size < uio->uio_offset) { 638 uvm_vnp_setsize(vp, uio->uio_offset); 639 extended = 1; 640 } 641 642 if ((oldoff & ~(nmp->nm_wsize - 1)) != 643 (uio->uio_offset & ~(nmp->nm_wsize - 1))) { 644 simple_lock(&vp->v_interlock); 645 error = VOP_PUTPAGES(vp, 646 trunc_page(oldoff & ~(nmp->nm_wsize - 1)), 647 round_page((uio->uio_offset + nmp->nm_wsize - 1) & 648 ~(nmp->nm_wsize - 1)), PGO_CLEANIT); 649 } 650 } while (uio->uio_resid > 0); 651 if (wrotedata) 652 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 653 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 654 simple_lock(&vp->v_interlock); 655 error = VOP_PUTPAGES(vp, 656 trunc_page(origoff & ~(nmp->nm_wsize - 1)), 657 round_page((uio->uio_offset + nmp->nm_wsize - 1) & 658 ~(nmp->nm_wsize - 1)), 659 PGO_CLEANIT | PGO_SYNCIO); 660 } 661 return error; 662 } 663 664 /* 665 * Get an nfs cache block. 666 * Allocate a new one if the block isn't currently in the cache 667 * and return the block marked busy. If the calling process is 668 * interrupted by a signal for an interruptible mount point, return 669 * NULL. 670 */ 671 struct buf * 672 nfs_getcacheblk(vp, bn, size, l) 673 struct vnode *vp; 674 daddr_t bn; 675 int size; 676 struct lwp *l; 677 { 678 struct buf *bp; 679 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 680 681 if (nmp->nm_flag & NFSMNT_INT) { 682 bp = getblk(vp, bn, size, PCATCH, 0); 683 while (bp == NULL) { 684 if (nfs_sigintr(nmp, NULL, l)) 685 return (NULL); 686 bp = getblk(vp, bn, size, 0, 2 * hz); 687 } 688 } else 689 bp = getblk(vp, bn, size, 0, 0); 690 return (bp); 691 } 692 693 /* 694 * Flush and invalidate all dirty buffers. If another process is already 695 * doing the flush, just wait for completion. 696 */ 697 int 698 nfs_vinvalbuf(vp, flags, cred, l, intrflg) 699 struct vnode *vp; 700 int flags; 701 kauth_cred_t cred; 702 struct lwp *l; 703 int intrflg; 704 { 705 struct nfsnode *np = VTONFS(vp); 706 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 707 int error = 0, slpflag, slptimeo; 708 709 if ((nmp->nm_flag & NFSMNT_INT) == 0) 710 intrflg = 0; 711 if (intrflg) { 712 slpflag = PCATCH; 713 slptimeo = 2 * hz; 714 } else { 715 slpflag = 0; 716 slptimeo = 0; 717 } 718 /* 719 * First wait for any other process doing a flush to complete. 720 */ 721 simple_lock(&vp->v_interlock); 722 while (np->n_flag & NFLUSHINPROG) { 723 np->n_flag |= NFLUSHWANT; 724 error = ltsleep(&np->n_flag, PRIBIO + 2, "nfsvinval", 725 slptimeo, &vp->v_interlock); 726 if (error && intrflg && nfs_sigintr(nmp, NULL, l)) { 727 simple_unlock(&vp->v_interlock); 728 return EINTR; 729 } 730 } 731 732 /* 733 * Now, flush as required. 734 */ 735 np->n_flag |= NFLUSHINPROG; 736 simple_unlock(&vp->v_interlock); 737 error = vinvalbuf(vp, flags, cred, l, slpflag, 0); 738 while (error) { 739 if (intrflg && nfs_sigintr(nmp, NULL, l)) { 740 error = EINTR; 741 break; 742 } 743 error = vinvalbuf(vp, flags, cred, l, 0, slptimeo); 744 } 745 simple_lock(&vp->v_interlock); 746 if (error == 0) 747 np->n_flag &= ~NMODIFIED; 748 np->n_flag &= ~NFLUSHINPROG; 749 if (np->n_flag & NFLUSHWANT) { 750 np->n_flag &= ~NFLUSHWANT; 751 wakeup(&np->n_flag); 752 } 753 simple_unlock(&vp->v_interlock); 754 return error; 755 } 756 757 /* 758 * nfs_flushstalebuf: flush cache if it's stale. 759 * 760 * => caller shouldn't own any pages or buffers which belong to the vnode. 761 */ 762 763 int 764 nfs_flushstalebuf(struct vnode *vp, kauth_cred_t cred, struct lwp *l, 765 int flags) 766 { 767 struct nfsnode *np = VTONFS(vp); 768 struct vattr vattr; 769 int error; 770 771 if (np->n_flag & NMODIFIED) { 772 if ((flags & NFS_FLUSHSTALEBUF_MYWRITE) == 0 773 || vp->v_type != VREG) { 774 error = nfs_vinvalbuf(vp, V_SAVE, cred, l, 1); 775 if (error) 776 return error; 777 if (vp->v_type == VDIR) { 778 nfs_invaldircache(vp, 0); 779 } 780 } else { 781 /* 782 * XXX assuming writes are ours. 783 */ 784 } 785 NFS_INVALIDATE_ATTRCACHE(np); 786 error = VOP_GETATTR(vp, &vattr, cred, l); 787 if (error) 788 return error; 789 np->n_mtime = vattr.va_mtime; 790 } else { 791 error = VOP_GETATTR(vp, &vattr, cred, l); 792 if (error) 793 return error; 794 if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) { 795 if (vp->v_type == VDIR) { 796 nfs_invaldircache(vp, 0); 797 } 798 error = nfs_vinvalbuf(vp, V_SAVE, cred, l, 1); 799 if (error) 800 return error; 801 np->n_mtime = vattr.va_mtime; 802 } 803 } 804 805 return error; 806 } 807 808 /* 809 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 810 * This is mainly to avoid queueing async I/O requests when the nfsiods 811 * are all hung on a dead server. 812 */ 813 814 int 815 nfs_asyncio(bp) 816 struct buf *bp; 817 { 818 int i; 819 struct nfsmount *nmp; 820 int gotiod, slpflag = 0, slptimeo = 0, error; 821 822 if (nfs_numasync == 0) 823 return (EIO); 824 825 nmp = VFSTONFS(bp->b_vp->v_mount); 826 again: 827 if (nmp->nm_flag & NFSMNT_INT) 828 slpflag = PCATCH; 829 gotiod = FALSE; 830 831 /* 832 * Find a free iod to process this request. 833 */ 834 835 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { 836 struct nfs_iod *iod = &nfs_asyncdaemon[i]; 837 838 simple_lock(&iod->nid_slock); 839 if (iod->nid_want) { 840 /* 841 * Found one, so wake it up and tell it which 842 * mount to process. 843 */ 844 iod->nid_want = NULL; 845 iod->nid_mount = nmp; 846 wakeup(&iod->nid_want); 847 simple_lock(&nmp->nm_slock); 848 simple_unlock(&iod->nid_slock); 849 nmp->nm_bufqiods++; 850 gotiod = TRUE; 851 break; 852 } 853 simple_unlock(&iod->nid_slock); 854 } 855 856 /* 857 * If none are free, we may already have an iod working on this mount 858 * point. If so, it will process our request. 859 */ 860 861 if (!gotiod) { 862 simple_lock(&nmp->nm_slock); 863 if (nmp->nm_bufqiods > 0) 864 gotiod = TRUE; 865 } 866 867 LOCK_ASSERT(simple_lock_held(&nmp->nm_slock)); 868 869 /* 870 * If we have an iod which can process the request, then queue 871 * the buffer. However, even if we have an iod, do not initiate 872 * queue cleaning if curproc is the pageout daemon. if the NFS mount 873 * is via local loopback, we may put curproc (pagedaemon) to sleep 874 * waiting for the writes to complete. But the server (ourself) 875 * may block the write, waiting for its (ie., our) pagedaemon 876 * to produce clean pages to handle the write: deadlock. 877 * XXX: start non-loopback mounts straight away? If "lots free", 878 * let pagedaemon start loopback writes anyway? 879 */ 880 if (gotiod) { 881 882 /* 883 * Ensure that the queue never grows too large. 884 */ 885 if (curproc == uvm.pagedaemon_proc) { 886 /* Enque for later, to avoid free-page deadlock */ 887 (void) 0; 888 } else while (nmp->nm_bufqlen >= 2*nfs_numasync) { 889 nmp->nm_bufqwant = TRUE; 890 error = ltsleep(&nmp->nm_bufq, 891 slpflag | PRIBIO | PNORELOCK, 892 "nfsaio", slptimeo, &nmp->nm_slock); 893 if (error) { 894 if (nfs_sigintr(nmp, NULL, curlwp)) 895 return (EINTR); 896 if (slpflag == PCATCH) { 897 slpflag = 0; 898 slptimeo = 2 * hz; 899 } 900 } 901 902 /* 903 * We might have lost our iod while sleeping, 904 * so check and loop if nescessary. 905 */ 906 907 if (nmp->nm_bufqiods == 0) 908 goto again; 909 910 simple_lock(&nmp->nm_slock); 911 } 912 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); 913 nmp->nm_bufqlen++; 914 simple_unlock(&nmp->nm_slock); 915 return (0); 916 } 917 simple_unlock(&nmp->nm_slock); 918 919 /* 920 * All the iods are busy on other mounts, so return EIO to 921 * force the caller to process the i/o synchronously. 922 */ 923 924 return (EIO); 925 } 926 927 /* 928 * nfs_doio for read. 929 */ 930 static int 931 nfs_doio_read(bp, uiop) 932 struct buf *bp; 933 struct uio *uiop; 934 { 935 struct vnode *vp = bp->b_vp; 936 struct nfsnode *np = VTONFS(vp); 937 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 938 int error = 0; 939 940 uiop->uio_rw = UIO_READ; 941 switch (vp->v_type) { 942 case VREG: 943 nfsstats.read_bios++; 944 error = nfs_readrpc(vp, uiop); 945 if (!error && uiop->uio_resid) { 946 int diff, len; 947 948 /* 949 * If uio_resid > 0, there is a hole in the file and 950 * no writes after the hole have been pushed to 951 * the server yet or the file has been truncated 952 * on the server. 953 * Just zero fill the rest of the valid area. 954 */ 955 956 KASSERT(vp->v_size >= 957 uiop->uio_offset + uiop->uio_resid); 958 diff = bp->b_bcount - uiop->uio_resid; 959 len = uiop->uio_resid; 960 memset((char *)bp->b_data + diff, 0, len); 961 uiop->uio_resid = 0; 962 } 963 #if 0 964 if (uiop->uio_lwp && (vp->v_flag & VTEXT) && 965 (((nmp->nm_flag & NFSMNT_NQNFS) && 966 NQNFS_CKINVALID(vp, np, ND_READ) && 967 np->n_lrev != np->n_brev) || 968 (!(nmp->nm_flag & NFSMNT_NQNFS) && 969 timespeccmp(&np->n_mtime, &np->n_vattr->va_mtime, !=)))) { 970 killproc(uiop->uio_lwp->l_proc, "process text file was modified"); 971 #if 0 /* XXX NJWLWP */ 972 uiop->uio_lwp->l_proc->p_holdcnt++; 973 #endif 974 } 975 #endif 976 break; 977 case VLNK: 978 KASSERT(uiop->uio_offset == (off_t)0); 979 nfsstats.readlink_bios++; 980 error = nfs_readlinkrpc(vp, uiop, np->n_rcred); 981 break; 982 case VDIR: 983 nfsstats.readdir_bios++; 984 uiop->uio_offset = bp->b_dcookie; 985 #ifndef NFS_V2_ONLY 986 if (nmp->nm_flag & NFSMNT_RDIRPLUS) { 987 error = nfs_readdirplusrpc(vp, uiop, 988 curlwp->l_proc->p_cred); 989 /* 990 * nfs_request maps NFSERR_NOTSUPP to ENOTSUP. 991 */ 992 if (error == ENOTSUP) 993 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 994 } 995 #else 996 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 997 #endif 998 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) 999 error = nfs_readdirrpc(vp, uiop, 1000 curlwp->l_proc->p_cred); 1001 if (!error) { 1002 bp->b_dcookie = uiop->uio_offset; 1003 } 1004 break; 1005 default: 1006 printf("nfs_doio: type %x unexpected\n", vp->v_type); 1007 break; 1008 } 1009 if (error) { 1010 bp->b_flags |= B_ERROR; 1011 bp->b_error = error; 1012 } 1013 return error; 1014 } 1015 1016 /* 1017 * nfs_doio for write. 1018 */ 1019 static int 1020 nfs_doio_write(bp, uiop) 1021 struct buf *bp; 1022 struct uio *uiop; 1023 { 1024 struct vnode *vp = bp->b_vp; 1025 struct nfsnode *np = VTONFS(vp); 1026 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1027 int iomode; 1028 boolean_t stalewriteverf = FALSE; 1029 int i, npages = (bp->b_bcount + PAGE_SIZE - 1) >> PAGE_SHIFT; 1030 struct vm_page *pgs[npages]; 1031 #ifndef NFS_V2_ONLY 1032 boolean_t needcommit = TRUE; /* need only COMMIT RPC */ 1033 #else 1034 boolean_t needcommit = FALSE; /* need only COMMIT RPC */ 1035 #endif 1036 boolean_t pageprotected; 1037 struct uvm_object *uobj = &vp->v_uobj; 1038 int error; 1039 off_t off, cnt; 1040 1041 if ((bp->b_flags & B_ASYNC) != 0 && NFS_ISV3(vp)) { 1042 iomode = NFSV3WRITE_UNSTABLE; 1043 } else { 1044 iomode = NFSV3WRITE_FILESYNC; 1045 } 1046 1047 #ifndef NFS_V2_ONLY 1048 again: 1049 #endif 1050 lockmgr(&nmp->nm_writeverflock, LK_SHARED, NULL); 1051 1052 for (i = 0; i < npages; i++) { 1053 pgs[i] = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); 1054 if (pgs[i]->uobject == uobj && 1055 pgs[i]->offset == uiop->uio_offset + (i << PAGE_SHIFT)) { 1056 KASSERT(pgs[i]->flags & PG_BUSY); 1057 /* 1058 * this page belongs to our object. 1059 */ 1060 simple_lock(&uobj->vmobjlock); 1061 /* 1062 * write out the page stably if it's about to 1063 * be released because we can't resend it 1064 * on the server crash. 1065 * 1066 * XXX assuming PG_RELEASE|PG_PAGEOUT won't be 1067 * changed until unbusy the page. 1068 */ 1069 if (pgs[i]->flags & (PG_RELEASED|PG_PAGEOUT)) 1070 iomode = NFSV3WRITE_FILESYNC; 1071 /* 1072 * if we met a page which hasn't been sent yet, 1073 * we need do WRITE RPC. 1074 */ 1075 if ((pgs[i]->flags & PG_NEEDCOMMIT) == 0) 1076 needcommit = FALSE; 1077 simple_unlock(&uobj->vmobjlock); 1078 } else { 1079 iomode = NFSV3WRITE_FILESYNC; 1080 needcommit = FALSE; 1081 } 1082 } 1083 if (!needcommit && iomode == NFSV3WRITE_UNSTABLE) { 1084 simple_lock(&uobj->vmobjlock); 1085 for (i = 0; i < npages; i++) { 1086 pgs[i]->flags |= PG_NEEDCOMMIT | PG_RDONLY; 1087 pmap_page_protect(pgs[i], VM_PROT_READ); 1088 } 1089 simple_unlock(&uobj->vmobjlock); 1090 pageprotected = TRUE; /* pages can't be modified during i/o. */ 1091 } else 1092 pageprotected = FALSE; 1093 1094 /* 1095 * Send the data to the server if necessary, 1096 * otherwise just send a commit rpc. 1097 */ 1098 #ifndef NFS_V2_ONLY 1099 if (needcommit) { 1100 1101 /* 1102 * If the buffer is in the range that we already committed, 1103 * there's nothing to do. 1104 * 1105 * If it's in the range that we need to commit, push the 1106 * whole range at once, otherwise only push the buffer. 1107 * In both these cases, acquire the commit lock to avoid 1108 * other processes modifying the range. 1109 */ 1110 1111 off = uiop->uio_offset; 1112 cnt = bp->b_bcount; 1113 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1114 if (!nfs_in_committed_range(vp, off, bp->b_bcount)) { 1115 boolean_t pushedrange; 1116 if (nfs_in_tobecommitted_range(vp, off, bp->b_bcount)) { 1117 pushedrange = TRUE; 1118 off = np->n_pushlo; 1119 cnt = np->n_pushhi - np->n_pushlo; 1120 } else { 1121 pushedrange = FALSE; 1122 } 1123 error = nfs_commit(vp, off, cnt, curlwp); 1124 if (error == 0) { 1125 if (pushedrange) { 1126 nfs_merge_commit_ranges(vp); 1127 } else { 1128 nfs_add_committed_range(vp, off, cnt); 1129 } 1130 } 1131 } else { 1132 error = 0; 1133 } 1134 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1135 lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL); 1136 if (!error) { 1137 /* 1138 * pages are now on stable storage. 1139 */ 1140 uiop->uio_resid = 0; 1141 simple_lock(&uobj->vmobjlock); 1142 for (i = 0; i < npages; i++) { 1143 pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); 1144 } 1145 simple_unlock(&uobj->vmobjlock); 1146 return 0; 1147 } else if (error == NFSERR_STALEWRITEVERF) { 1148 nfs_clearcommit(vp->v_mount); 1149 goto again; 1150 } 1151 if (error) { 1152 bp->b_flags |= B_ERROR; 1153 bp->b_error = np->n_error = error; 1154 np->n_flag |= NWRITEERR; 1155 } 1156 return error; 1157 } 1158 #endif 1159 off = uiop->uio_offset; 1160 cnt = bp->b_bcount; 1161 uiop->uio_rw = UIO_WRITE; 1162 nfsstats.write_bios++; 1163 error = nfs_writerpc(vp, uiop, &iomode, pageprotected, &stalewriteverf); 1164 #ifndef NFS_V2_ONLY 1165 if (!error && iomode == NFSV3WRITE_UNSTABLE) { 1166 /* 1167 * we need to commit pages later. 1168 */ 1169 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1170 nfs_add_tobecommitted_range(vp, off, cnt); 1171 /* 1172 * if there can be too many uncommitted pages, commit them now. 1173 */ 1174 if (np->n_pushhi - np->n_pushlo > nfs_commitsize) { 1175 off = np->n_pushlo; 1176 cnt = nfs_commitsize >> 1; 1177 error = nfs_commit(vp, off, cnt, curlwp); 1178 if (!error) { 1179 nfs_add_committed_range(vp, off, cnt); 1180 nfs_del_tobecommitted_range(vp, off, cnt); 1181 } 1182 if (error == NFSERR_STALEWRITEVERF) { 1183 stalewriteverf = TRUE; 1184 error = 0; /* it isn't a real error */ 1185 } 1186 } else { 1187 /* 1188 * re-dirty pages so that they will be passed 1189 * to us later again. 1190 */ 1191 simple_lock(&uobj->vmobjlock); 1192 for (i = 0; i < npages; i++) { 1193 pgs[i]->flags &= ~PG_CLEAN; 1194 } 1195 simple_unlock(&uobj->vmobjlock); 1196 } 1197 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1198 } else 1199 #endif 1200 if (!error) { 1201 /* 1202 * pages are now on stable storage. 1203 */ 1204 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1205 nfs_del_committed_range(vp, off, cnt); 1206 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1207 simple_lock(&uobj->vmobjlock); 1208 for (i = 0; i < npages; i++) { 1209 pgs[i]->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); 1210 } 1211 simple_unlock(&uobj->vmobjlock); 1212 } else { 1213 /* 1214 * we got an error. 1215 */ 1216 bp->b_flags |= B_ERROR; 1217 bp->b_error = np->n_error = error; 1218 np->n_flag |= NWRITEERR; 1219 } 1220 1221 lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL); 1222 1223 if (stalewriteverf) { 1224 nfs_clearcommit(vp->v_mount); 1225 } 1226 return error; 1227 } 1228 1229 /* 1230 * nfs_doio for B_PHYS. 1231 */ 1232 static int 1233 nfs_doio_phys(bp, uiop) 1234 struct buf *bp; 1235 struct uio *uiop; 1236 { 1237 struct vnode *vp = bp->b_vp; 1238 int error; 1239 1240 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 1241 if (bp->b_flags & B_READ) { 1242 uiop->uio_rw = UIO_READ; 1243 nfsstats.read_physios++; 1244 error = nfs_readrpc(vp, uiop); 1245 } else { 1246 int iomode = NFSV3WRITE_DATASYNC; 1247 boolean_t stalewriteverf; 1248 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1249 1250 uiop->uio_rw = UIO_WRITE; 1251 nfsstats.write_physios++; 1252 lockmgr(&nmp->nm_writeverflock, LK_SHARED, NULL); 1253 error = nfs_writerpc(vp, uiop, &iomode, FALSE, &stalewriteverf); 1254 lockmgr(&nmp->nm_writeverflock, LK_RELEASE, NULL); 1255 if (stalewriteverf) { 1256 nfs_clearcommit(bp->b_vp->v_mount); 1257 } 1258 } 1259 if (error) { 1260 bp->b_flags |= B_ERROR; 1261 bp->b_error = error; 1262 } 1263 return error; 1264 } 1265 1266 /* 1267 * Do an I/O operation to/from a cache block. This may be called 1268 * synchronously or from an nfsiod. 1269 */ 1270 int 1271 nfs_doio(bp) 1272 struct buf *bp; 1273 { 1274 int error; 1275 struct uio uio; 1276 struct uio *uiop = &uio; 1277 struct iovec io; 1278 UVMHIST_FUNC("nfs_doio"); UVMHIST_CALLED(ubchist); 1279 1280 uiop->uio_iov = &io; 1281 uiop->uio_iovcnt = 1; 1282 uiop->uio_offset = (((off_t)bp->b_blkno) << DEV_BSHIFT); 1283 UIO_SETUP_SYSSPACE(uiop); 1284 io.iov_base = bp->b_data; 1285 io.iov_len = uiop->uio_resid = bp->b_bcount; 1286 1287 /* 1288 * Historically, paging was done with physio, but no more... 1289 */ 1290 if (bp->b_flags & B_PHYS) { 1291 /* 1292 * ...though reading /dev/drum still gets us here. 1293 */ 1294 error = nfs_doio_phys(bp, uiop); 1295 } else if (bp->b_flags & B_READ) { 1296 error = nfs_doio_read(bp, uiop); 1297 } else { 1298 error = nfs_doio_write(bp, uiop); 1299 } 1300 bp->b_resid = uiop->uio_resid; 1301 biodone(bp); 1302 return (error); 1303 } 1304 1305 /* 1306 * Vnode op for VM getpages. 1307 */ 1308 1309 int 1310 nfs_getpages(v) 1311 void *v; 1312 { 1313 struct vop_getpages_args /* { 1314 struct vnode *a_vp; 1315 voff_t a_offset; 1316 struct vm_page **a_m; 1317 int *a_count; 1318 int a_centeridx; 1319 vm_prot_t a_access_type; 1320 int a_advice; 1321 int a_flags; 1322 } */ *ap = v; 1323 1324 struct vnode *vp = ap->a_vp; 1325 struct uvm_object *uobj = &vp->v_uobj; 1326 struct nfsnode *np = VTONFS(vp); 1327 const int npages = *ap->a_count; 1328 struct vm_page *pg, **pgs, *opgs[npages]; 1329 off_t origoffset, len; 1330 int i, error; 1331 boolean_t v3 = NFS_ISV3(vp); 1332 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 1333 boolean_t locked = (ap->a_flags & PGO_LOCKED) != 0; 1334 1335 /* 1336 * call the genfs code to get the pages. `pgs' may be NULL 1337 * when doing read-ahead. 1338 */ 1339 1340 pgs = ap->a_m; 1341 if (write && locked && v3) { 1342 KASSERT(pgs != NULL); 1343 #ifdef DEBUG 1344 1345 /* 1346 * If PGO_LOCKED is set, real pages shouldn't exists 1347 * in the array. 1348 */ 1349 1350 for (i = 0; i < npages; i++) 1351 KDASSERT(pgs[i] == NULL || pgs[i] == PGO_DONTCARE); 1352 #endif 1353 memcpy(opgs, pgs, npages * sizeof(struct vm_pages *)); 1354 } 1355 error = genfs_getpages(v); 1356 if (error) { 1357 return (error); 1358 } 1359 1360 /* 1361 * for read faults where the nfs node is not yet marked NMODIFIED, 1362 * set PG_RDONLY on the pages so that we come back here if someone 1363 * tries to modify later via the mapping that will be entered for 1364 * this fault. 1365 */ 1366 1367 if (!write && (np->n_flag & NMODIFIED) == 0 && pgs != NULL) { 1368 if (!locked) { 1369 simple_lock(&uobj->vmobjlock); 1370 } 1371 for (i = 0; i < npages; i++) { 1372 pg = pgs[i]; 1373 if (pg == NULL || pg == PGO_DONTCARE) { 1374 continue; 1375 } 1376 pg->flags |= PG_RDONLY; 1377 } 1378 if (!locked) { 1379 simple_unlock(&uobj->vmobjlock); 1380 } 1381 } 1382 if (!write) { 1383 return (0); 1384 } 1385 1386 /* 1387 * this is a write fault, update the commit info. 1388 */ 1389 1390 origoffset = ap->a_offset; 1391 len = npages << PAGE_SHIFT; 1392 1393 if (v3) { 1394 error = lockmgr(&np->n_commitlock, 1395 LK_EXCLUSIVE | (locked ? LK_NOWAIT : 0), NULL); 1396 if (error) { 1397 KASSERT(locked != 0); 1398 1399 /* 1400 * Since PGO_LOCKED is set, we need to unbusy 1401 * all pages fetched by genfs_getpages() above, 1402 * tell the caller that there are no pages 1403 * available and put back original pgs array. 1404 */ 1405 1406 uvm_lock_pageq(); 1407 uvm_page_unbusy(pgs, npages); 1408 uvm_unlock_pageq(); 1409 *ap->a_count = 0; 1410 memcpy(pgs, opgs, 1411 npages * sizeof(struct vm_pages *)); 1412 return (error); 1413 } 1414 nfs_del_committed_range(vp, origoffset, len); 1415 nfs_del_tobecommitted_range(vp, origoffset, len); 1416 } 1417 np->n_flag |= NMODIFIED; 1418 if (!locked) { 1419 simple_lock(&uobj->vmobjlock); 1420 } 1421 for (i = 0; i < npages; i++) { 1422 pg = pgs[i]; 1423 if (pg == NULL || pg == PGO_DONTCARE) { 1424 continue; 1425 } 1426 pg->flags &= ~(PG_NEEDCOMMIT | PG_RDONLY); 1427 } 1428 if (!locked) { 1429 simple_unlock(&uobj->vmobjlock); 1430 } 1431 if (v3) { 1432 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1433 } 1434 return (0); 1435 } 1436