1 /* $NetBSD: nfs_bio.c,v 1.57 2000/12/13 18:15:56 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 39 */ 40 41 #include "opt_nfs.h" 42 #include "opt_ddb.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/resourcevar.h> 47 #include <sys/signalvar.h> 48 #include <sys/proc.h> 49 #include <sys/buf.h> 50 #include <sys/vnode.h> 51 #include <sys/mount.h> 52 #include <sys/kernel.h> 53 #include <sys/namei.h> 54 #include <sys/dirent.h> 55 #include <sys/malloc.h> 56 57 #include <uvm/uvm_extern.h> 58 #include <uvm/uvm.h> 59 60 #include <nfs/rpcv2.h> 61 #include <nfs/nfsproto.h> 62 #include <nfs/nfs.h> 63 #include <nfs/nfsmount.h> 64 #include <nfs/nqnfs.h> 65 #include <nfs/nfsnode.h> 66 #include <nfs/nfs_var.h> 67 68 extern int nfs_numasync; 69 extern struct nfsstats nfsstats; 70 71 /* 72 * Vnode op for read using bio 73 * Any similarity to readip() is purely coincidental 74 */ 75 int 76 nfs_bioread(vp, uio, ioflag, cred, cflag) 77 struct vnode *vp; 78 struct uio *uio; 79 int ioflag, cflag; 80 struct ucred *cred; 81 { 82 struct nfsnode *np = VTONFS(vp); 83 int biosize; 84 struct buf *bp = NULL, *rabp; 85 struct vattr vattr; 86 struct proc *p; 87 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 88 struct nfsdircache *ndp = NULL, *nndp = NULL; 89 caddr_t baddr, ep, edp; 90 int got_buf = 0, error = 0, n = 0, on = 0, en, enn; 91 int enough = 0; 92 struct dirent *dp, *pdp; 93 off_t curoff = 0; 94 95 #ifdef DIAGNOSTIC 96 if (uio->uio_rw != UIO_READ) 97 panic("nfs_read mode"); 98 #endif 99 if (uio->uio_resid == 0) 100 return (0); 101 if (vp->v_type != VDIR && uio->uio_offset < 0) 102 return (EINVAL); 103 p = uio->uio_procp; 104 #ifndef NFS_V2_ONLY 105 if ((nmp->nm_flag & NFSMNT_NFSV3) && 106 !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) 107 (void)nfs_fsinfo(nmp, vp, cred, p); 108 #endif 109 if (vp->v_type != VDIR && 110 (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 111 return (EFBIG); 112 biosize = nmp->nm_rsize; 113 114 /* 115 * For nfs, cache consistency can only be maintained approximately. 116 * Although RFC1094 does not specify the criteria, the following is 117 * believed to be compatible with the reference port. 118 * For nqnfs, full cache consistency is maintained within the loop. 119 * For nfs: 120 * If the file's modify time on the server has changed since the 121 * last read rpc or you have written to the file, 122 * you may have lost data cache consistency with the 123 * server, so flush all of the file's data out of the cache. 124 * Then force a getattr rpc to ensure that you have up to date 125 * attributes. 126 * NB: This implies that cache data can be read when up to 127 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 128 * attributes this could be forced by setting n_attrstamp to 0 before 129 * the VOP_GETATTR() call. 130 */ 131 132 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 133 if (np->n_flag & NMODIFIED) { 134 if (vp->v_type != VREG) { 135 if (vp->v_type != VDIR) 136 panic("nfs: bioread, not dir"); 137 nfs_invaldircache(vp, 0); 138 np->n_direofoffset = 0; 139 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 140 if (error) 141 return (error); 142 } 143 np->n_attrstamp = 0; 144 error = VOP_GETATTR(vp, &vattr, cred, p); 145 if (error) 146 return (error); 147 np->n_mtime = vattr.va_mtime.tv_sec; 148 } else { 149 error = VOP_GETATTR(vp, &vattr, cred, p); 150 if (error) 151 return (error); 152 if (np->n_mtime != vattr.va_mtime.tv_sec) { 153 if (vp->v_type == VDIR) { 154 nfs_invaldircache(vp, 0); 155 np->n_direofoffset = 0; 156 } 157 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 158 if (error) 159 return (error); 160 np->n_mtime = vattr.va_mtime.tv_sec; 161 } 162 } 163 } 164 165 /* 166 * update the cached read creds for this node. 167 */ 168 169 if (np->n_rcred) { 170 crfree(np->n_rcred); 171 } 172 np->n_rcred = cred; 173 crhold(cred); 174 175 do { 176 #ifndef NFS_V2_ONLY 177 /* 178 * Get a valid lease. If cached data is stale, flush it. 179 */ 180 if (nmp->nm_flag & NFSMNT_NQNFS) { 181 if (NQNFS_CKINVALID(vp, np, ND_READ)) { 182 do { 183 error = nqnfs_getlease(vp, ND_READ, cred, p); 184 } while (error == NQNFS_EXPIRED); 185 if (error) 186 return (error); 187 if (np->n_lrev != np->n_brev || 188 (np->n_flag & NQNFSNONCACHE) || 189 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 190 if (vp->v_type == VDIR) { 191 nfs_invaldircache(vp, 0); 192 np->n_direofoffset = 0; 193 } 194 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 195 if (error) 196 return (error); 197 np->n_brev = np->n_lrev; 198 } 199 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 200 nfs_invaldircache(vp, 0); 201 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 202 np->n_direofoffset = 0; 203 if (error) 204 return (error); 205 } 206 } 207 #endif 208 /* 209 * Don't cache symlinks. 210 */ 211 if (np->n_flag & NQNFSNONCACHE 212 || ((vp->v_flag & VROOT) && vp->v_type == VLNK)) { 213 switch (vp->v_type) { 214 case VREG: 215 return (nfs_readrpc(vp, uio)); 216 case VLNK: 217 return (nfs_readlinkrpc(vp, uio, cred)); 218 case VDIR: 219 break; 220 default: 221 printf(" NQNFSNONCACHE: type %x unexpected\n", 222 vp->v_type); 223 }; 224 } 225 baddr = (caddr_t)0; 226 switch (vp->v_type) { 227 case VREG: 228 nfsstats.biocache_reads++; 229 230 error = 0; 231 while (uio->uio_resid > 0) { 232 void *win; 233 vsize_t bytelen = min(np->n_size - uio->uio_offset, 234 uio->uio_resid); 235 236 if (bytelen == 0) 237 break; 238 win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, 239 &bytelen, UBC_READ); 240 error = uiomove(win, bytelen, uio); 241 ubc_release(win, 0); 242 if (error) { 243 break; 244 } 245 } 246 n = 0; 247 break; 248 249 case VLNK: 250 nfsstats.biocache_readlinks++; 251 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 252 if (!bp) 253 return (EINTR); 254 if ((bp->b_flags & B_DONE) == 0) { 255 bp->b_flags |= B_READ; 256 error = nfs_doio(bp, p); 257 if (error) { 258 brelse(bp); 259 return (error); 260 } 261 } 262 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 263 got_buf = 1; 264 on = 0; 265 break; 266 case VDIR: 267 diragain: 268 nfsstats.biocache_readdirs++; 269 ndp = nfs_searchdircache(vp, uio->uio_offset, 270 (nmp->nm_flag & NFSMNT_XLATECOOKIE), 0); 271 if (!ndp) { 272 /* 273 * We've been handed a cookie that is not 274 * in the cache. If we're not translating 275 * 32 <-> 64, it may be a value that was 276 * flushed out of the cache because it grew 277 * too big. Let the server judge if it's 278 * valid or not. In the translation case, 279 * we have no way of validating this value, 280 * so punt. 281 */ 282 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) 283 return (EINVAL); 284 ndp = nfs_enterdircache(vp, uio->uio_offset, 285 uio->uio_offset, 0, 0); 286 } 287 288 if (uio->uio_offset != 0 && 289 ndp->dc_cookie == np->n_direofoffset) { 290 nfsstats.direofcache_hits++; 291 return (0); 292 } 293 294 bp = nfs_getcacheblk(vp, ndp->dc_blkno, NFS_DIRBLKSIZ, p); 295 if (!bp) 296 return (EINTR); 297 if ((bp->b_flags & B_DONE) == 0) { 298 bp->b_flags |= B_READ; 299 bp->b_dcookie = ndp->dc_blkcookie; 300 error = nfs_doio(bp, p); 301 if (error) { 302 /* 303 * Yuck! The directory has been modified on the 304 * server. Punt and let the userland code 305 * deal with it. 306 */ 307 brelse(bp); 308 if (error == NFSERR_BAD_COOKIE) { 309 nfs_invaldircache(vp, 0); 310 nfs_vinvalbuf(vp, 0, cred, p, 1); 311 error = EINVAL; 312 } 313 return (error); 314 } 315 } 316 317 /* 318 * Just return if we hit EOF right away with this 319 * block. Always check here, because direofoffset 320 * may have been set by an nfsiod since the last 321 * check. 322 */ 323 if (np->n_direofoffset != 0 && 324 ndp->dc_blkcookie == np->n_direofoffset) { 325 brelse(bp); 326 return (0); 327 } 328 329 /* 330 * Find the entry we were looking for in the block. 331 */ 332 333 en = ndp->dc_entry; 334 335 pdp = dp = (struct dirent *)bp->b_data; 336 edp = bp->b_data + bp->b_bcount; 337 enn = 0; 338 while (enn < en && (caddr_t)dp < edp) { 339 pdp = dp; 340 dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); 341 enn++; 342 } 343 344 /* 345 * If the entry number was bigger than the number of 346 * entries in the block, or the cookie of the previous 347 * entry doesn't match, the directory cache is 348 * stale. Flush it and try again (i.e. go to 349 * the server). 350 */ 351 if ((caddr_t)dp >= edp || (caddr_t)dp + dp->d_reclen > edp || 352 (en > 0 && NFS_GETCOOKIE(pdp) != ndp->dc_cookie)) { 353 #ifdef DEBUG 354 printf("invalid cache: %p %p %p off %lx %lx\n", 355 pdp, dp, edp, 356 (unsigned long)uio->uio_offset, 357 (unsigned long)NFS_GETCOOKIE(pdp)); 358 #endif 359 brelse(bp); 360 nfs_invaldircache(vp, 0); 361 nfs_vinvalbuf(vp, 0, cred, p, 0); 362 goto diragain; 363 } 364 365 on = (caddr_t)dp - bp->b_data; 366 367 /* 368 * Cache all entries that may be exported to the 369 * user, as they may be thrown back at us. The 370 * NFSBIO_CACHECOOKIES flag indicates that all 371 * entries are being 'exported', so cache them all. 372 */ 373 374 if (en == 0 && pdp == dp) { 375 dp = (struct dirent *) 376 ((caddr_t)dp + dp->d_reclen); 377 enn++; 378 } 379 380 if (uio->uio_resid < (bp->b_bcount - on)) { 381 n = uio->uio_resid; 382 enough = 1; 383 } else 384 n = bp->b_bcount - on; 385 386 ep = bp->b_data + on + n; 387 388 /* 389 * Find last complete entry to copy, caching entries 390 * (if requested) as we go. 391 */ 392 393 while ((caddr_t)dp < ep && (caddr_t)dp + dp->d_reclen <= ep) { 394 if (cflag & NFSBIO_CACHECOOKIES) { 395 nndp = nfs_enterdircache(vp, NFS_GETCOOKIE(pdp), 396 ndp->dc_blkcookie, enn, bp->b_lblkno); 397 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 398 NFS_STASHCOOKIE32(pdp, 399 nndp->dc_cookie32); 400 } 401 } 402 pdp = dp; 403 dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); 404 enn++; 405 } 406 407 /* 408 * If the last requested entry was not the last in the 409 * buffer (happens if NFS_DIRFRAGSIZ < NFS_DIRBLKSIZ), 410 * cache the cookie of the last requested one, and 411 * set of the offset to it. 412 */ 413 414 if ((on + n) < bp->b_bcount) { 415 curoff = NFS_GETCOOKIE(pdp); 416 nndp = nfs_enterdircache(vp, curoff, ndp->dc_blkcookie, 417 enn, bp->b_lblkno); 418 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 419 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); 420 curoff = nndp->dc_cookie32; 421 } 422 } else 423 curoff = bp->b_dcookie; 424 425 /* 426 * Always cache the entry for the next block, 427 * so that readaheads can use it. 428 */ 429 nndp = nfs_enterdircache(vp, bp->b_dcookie, bp->b_dcookie, 0,0); 430 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 431 if (curoff == bp->b_dcookie) { 432 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); 433 curoff = nndp->dc_cookie32; 434 } 435 } 436 437 n = ((caddr_t)pdp + pdp->d_reclen) - (bp->b_data + on); 438 439 /* 440 * If not eof and read aheads are enabled, start one. 441 * (You need the current block first, so that you have the 442 * directory offset cookie of the next block.) 443 */ 444 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 445 np->n_direofoffset == 0 && !(np->n_flag & NQNFSNONCACHE)) { 446 rabp = nfs_getcacheblk(vp, nndp->dc_blkno, 447 NFS_DIRBLKSIZ, p); 448 if (rabp) { 449 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 450 rabp->b_dcookie = nndp->dc_cookie; 451 rabp->b_flags |= (B_READ | B_ASYNC); 452 if (nfs_asyncio(rabp)) { 453 rabp->b_flags |= B_INVAL; 454 brelse(rabp); 455 } 456 } else 457 brelse(rabp); 458 } 459 } 460 got_buf = 1; 461 break; 462 default: 463 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 464 break; 465 } 466 467 if (n > 0) { 468 if (!baddr) 469 baddr = bp->b_data; 470 error = uiomove(baddr + on, (int)n, uio); 471 } 472 switch (vp->v_type) { 473 case VREG: 474 break; 475 case VLNK: 476 n = 0; 477 break; 478 case VDIR: 479 if (np->n_flag & NQNFSNONCACHE) 480 bp->b_flags |= B_INVAL; 481 uio->uio_offset = curoff; 482 if (enough) 483 n = 0; 484 break; 485 default: 486 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 487 } 488 if (got_buf) 489 brelse(bp); 490 } while (error == 0 && uio->uio_resid > 0 && n > 0); 491 return (error); 492 } 493 494 /* 495 * Vnode op for write using bio 496 */ 497 int 498 nfs_write(v) 499 void *v; 500 { 501 struct vop_write_args /* { 502 struct vnode *a_vp; 503 struct uio *a_uio; 504 int a_ioflag; 505 struct ucred *a_cred; 506 } */ *ap = v; 507 struct uio *uio = ap->a_uio; 508 struct proc *p = uio->uio_procp; 509 struct vnode *vp = ap->a_vp; 510 struct nfsnode *np = VTONFS(vp); 511 struct ucred *cred = ap->a_cred; 512 int ioflag = ap->a_ioflag; 513 struct vattr vattr; 514 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 515 int error = 0, iomode, must_commit; 516 int rv; 517 518 #ifdef DIAGNOSTIC 519 if (uio->uio_rw != UIO_WRITE) 520 panic("nfs_write mode"); 521 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 522 panic("nfs_write proc"); 523 #endif 524 if (vp->v_type != VREG) 525 return (EIO); 526 if (np->n_flag & NWRITEERR) { 527 np->n_flag &= ~NWRITEERR; 528 return (np->n_error); 529 } 530 #ifndef NFS_V2_ONLY 531 if ((nmp->nm_flag & NFSMNT_NFSV3) && 532 !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) 533 (void)nfs_fsinfo(nmp, vp, cred, p); 534 #endif 535 if (ioflag & (IO_APPEND | IO_SYNC)) { 536 if (np->n_flag & NMODIFIED) { 537 np->n_attrstamp = 0; 538 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 539 if (error) 540 return (error); 541 } 542 if (ioflag & IO_APPEND) { 543 np->n_attrstamp = 0; 544 error = VOP_GETATTR(vp, &vattr, cred, p); 545 if (error) 546 return (error); 547 uio->uio_offset = np->n_size; 548 } 549 } 550 if (uio->uio_offset < 0) 551 return (EINVAL); 552 if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 553 return (EFBIG); 554 if (uio->uio_resid == 0) 555 return (0); 556 /* 557 * Maybe this should be above the vnode op call, but so long as 558 * file servers have no limits, i don't think it matters 559 */ 560 if (p && uio->uio_offset + uio->uio_resid > 561 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 562 psignal(p, SIGXFSZ); 563 return (EFBIG); 564 } 565 566 /* 567 * update the cached write creds for this node. 568 */ 569 570 if (np->n_wcred) { 571 crfree(np->n_wcred); 572 } 573 np->n_wcred = cred; 574 crhold(cred); 575 576 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { 577 iomode = NFSV3WRITE_FILESYNC; 578 error = nfs_writerpc(vp, uio, &iomode, &must_commit); 579 if (must_commit) 580 nfs_clearcommit(vp->v_mount); 581 return (error); 582 } 583 584 do { 585 void *win; 586 voff_t oldoff = uio->uio_offset; 587 vsize_t bytelen = uio->uio_resid; 588 589 #ifndef NFS_V2_ONLY 590 /* 591 * Check for a valid write lease. 592 */ 593 if ((nmp->nm_flag & NFSMNT_NQNFS) && 594 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 595 do { 596 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 597 } while (error == NQNFS_EXPIRED); 598 if (error) 599 return (error); 600 if (np->n_lrev != np->n_brev || 601 (np->n_flag & NQNFSNONCACHE)) { 602 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 603 if (error) 604 return (error); 605 np->n_brev = np->n_lrev; 606 } 607 } 608 #endif 609 nfsstats.biocache_writes++; 610 611 np->n_flag |= NMODIFIED; 612 if (np->n_size < uio->uio_offset + bytelen) { 613 np->n_size = uio->uio_offset + bytelen; 614 uvm_vnp_setsize(vp, np->n_size); 615 } 616 win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen, 617 UBC_WRITE); 618 error = uiomove(win, bytelen, uio); 619 if (error) { 620 memset((void *)trunc_page((vaddr_t)win), 0, 621 round_page((vaddr_t)win + bytelen) - 622 trunc_page((vaddr_t)win)); 623 } 624 ubc_release(win, 0); 625 rv = 1; 626 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 627 simple_lock(&vp->v_uvm.u_obj.vmobjlock); 628 rv = vp->v_uvm.u_obj.pgops->pgo_flush( 629 &vp->v_uvm.u_obj, 630 oldoff & ~(nmp->nm_wsize - 1), 631 uio->uio_offset & ~(nmp->nm_wsize - 1), 632 PGO_CLEANIT|PGO_SYNCIO); 633 simple_unlock(&vp->v_uvm.u_obj.vmobjlock); 634 } else if ((oldoff & ~(nmp->nm_wsize - 1)) != 635 (uio->uio_offset & ~(nmp->nm_wsize - 1))) { 636 simple_lock(&vp->v_uvm.u_obj.vmobjlock); 637 rv = vp->v_uvm.u_obj.pgops->pgo_flush( 638 &vp->v_uvm.u_obj, 639 oldoff & ~(nmp->nm_wsize - 1), 640 uio->uio_offset & ~(nmp->nm_wsize - 1), 641 PGO_CLEANIT|PGO_WEAK); 642 simple_unlock(&vp->v_uvm.u_obj.vmobjlock); 643 } 644 if (!rv) { 645 error = EIO; 646 break; 647 } 648 } while (uio->uio_resid > 0); 649 return error; 650 } 651 652 /* 653 * Get an nfs cache block. 654 * Allocate a new one if the block isn't currently in the cache 655 * and return the block marked busy. If the calling process is 656 * interrupted by a signal for an interruptible mount point, return 657 * NULL. 658 */ 659 struct buf * 660 nfs_getcacheblk(vp, bn, size, p) 661 struct vnode *vp; 662 daddr_t bn; 663 int size; 664 struct proc *p; 665 { 666 struct buf *bp; 667 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 668 669 if (nmp->nm_flag & NFSMNT_INT) { 670 bp = getblk(vp, bn, size, PCATCH, 0); 671 while (bp == NULL) { 672 if (nfs_sigintr(nmp, NULL, p)) 673 return (NULL); 674 bp = getblk(vp, bn, size, 0, 2 * hz); 675 } 676 } else 677 bp = getblk(vp, bn, size, 0, 0); 678 return (bp); 679 } 680 681 /* 682 * Flush and invalidate all dirty buffers. If another process is already 683 * doing the flush, just wait for completion. 684 */ 685 int 686 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 687 struct vnode *vp; 688 int flags; 689 struct ucred *cred; 690 struct proc *p; 691 int intrflg; 692 { 693 struct nfsnode *np = VTONFS(vp); 694 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 695 int error = 0, slpflag, slptimeo; 696 697 if ((nmp->nm_flag & NFSMNT_INT) == 0) 698 intrflg = 0; 699 if (intrflg) { 700 slpflag = PCATCH; 701 slptimeo = 2 * hz; 702 } else { 703 slpflag = 0; 704 slptimeo = 0; 705 } 706 /* 707 * First wait for any other process doing a flush to complete. 708 */ 709 while (np->n_flag & NFLUSHINPROG) { 710 np->n_flag |= NFLUSHWANT; 711 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 712 slptimeo); 713 if (error && intrflg && nfs_sigintr(nmp, NULL, p)) 714 return (EINTR); 715 } 716 717 /* 718 * Now, flush as required. 719 */ 720 np->n_flag |= NFLUSHINPROG; 721 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 722 while (error) { 723 if (intrflg && nfs_sigintr(nmp, NULL, p)) { 724 np->n_flag &= ~NFLUSHINPROG; 725 if (np->n_flag & NFLUSHWANT) { 726 np->n_flag &= ~NFLUSHWANT; 727 wakeup((caddr_t)&np->n_flag); 728 } 729 return (EINTR); 730 } 731 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 732 } 733 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 734 if (np->n_flag & NFLUSHWANT) { 735 np->n_flag &= ~NFLUSHWANT; 736 wakeup((caddr_t)&np->n_flag); 737 } 738 return (0); 739 } 740 741 /* 742 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 743 * This is mainly to avoid queueing async I/O requests when the nfsiods 744 * are all hung on a dead server. 745 */ 746 int 747 nfs_asyncio(bp) 748 struct buf *bp; 749 { 750 int i; 751 struct nfsmount *nmp; 752 int gotiod, slpflag = 0, slptimeo = 0, error; 753 754 if (nfs_numasync == 0) 755 return (EIO); 756 757 758 nmp = VFSTONFS(bp->b_vp->v_mount); 759 again: 760 if (nmp->nm_flag & NFSMNT_INT) 761 slpflag = PCATCH; 762 gotiod = FALSE; 763 764 /* 765 * Find a free iod to process this request. 766 */ 767 768 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 769 if (nfs_iodwant[i]) { 770 /* 771 * Found one, so wake it up and tell it which 772 * mount to process. 773 */ 774 nfs_iodwant[i] = NULL; 775 nfs_iodmount[i] = nmp; 776 nmp->nm_bufqiods++; 777 wakeup((caddr_t)&nfs_iodwant[i]); 778 gotiod = TRUE; 779 break; 780 } 781 /* 782 * If none are free, we may already have an iod working on this mount 783 * point. If so, it will process our request. 784 */ 785 if (!gotiod && nmp->nm_bufqiods > 0) 786 gotiod = TRUE; 787 788 /* 789 * If we have an iod which can process the request, then queue 790 * the buffer. 791 */ 792 if (gotiod) { 793 /* 794 * Ensure that the queue never grows too large. 795 */ 796 while (nmp->nm_bufqlen >= 2*nfs_numasync) { 797 nmp->nm_bufqwant = TRUE; 798 error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, 799 "nfsaio", slptimeo); 800 if (error) { 801 if (nfs_sigintr(nmp, NULL, bp->b_proc)) 802 return (EINTR); 803 if (slpflag == PCATCH) { 804 slpflag = 0; 805 slptimeo = 2 * hz; 806 } 807 } 808 /* 809 * We might have lost our iod while sleeping, 810 * so check and loop if nescessary. 811 */ 812 if (nmp->nm_bufqiods == 0) 813 goto again; 814 } 815 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); 816 nmp->nm_bufqlen++; 817 return (0); 818 } 819 820 /* 821 * All the iods are busy on other mounts, so return EIO to 822 * force the caller to process the i/o synchronously. 823 */ 824 return (EIO); 825 } 826 827 /* 828 * Do an I/O operation to/from a cache block. This may be called 829 * synchronously or from an nfsiod. 830 */ 831 int 832 nfs_doio(bp, p) 833 struct buf *bp; 834 struct proc *p; 835 { 836 struct uio *uiop; 837 struct vnode *vp; 838 struct nfsnode *np; 839 struct nfsmount *nmp; 840 int error = 0, diff, len, iomode, must_commit = 0; 841 struct uio uio; 842 struct iovec io; 843 844 vp = bp->b_vp; 845 np = VTONFS(vp); 846 nmp = VFSTONFS(vp->v_mount); 847 uiop = &uio; 848 uiop->uio_iov = &io; 849 uiop->uio_iovcnt = 1; 850 uiop->uio_segflg = UIO_SYSSPACE; 851 uiop->uio_procp = p; 852 853 /* 854 * Historically, paging was done with physio, but no more... 855 */ 856 if (bp->b_flags & B_PHYS) { 857 /* 858 * ...though reading /dev/drum still gets us here. 859 */ 860 io.iov_len = uiop->uio_resid = bp->b_bcount; 861 /* mapping was done by vmapbuf() */ 862 io.iov_base = bp->b_data; 863 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 864 if (bp->b_flags & B_READ) { 865 uiop->uio_rw = UIO_READ; 866 nfsstats.read_physios++; 867 error = nfs_readrpc(vp, uiop); 868 } else { 869 iomode = NFSV3WRITE_DATASYNC; 870 uiop->uio_rw = UIO_WRITE; 871 nfsstats.write_physios++; 872 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 873 } 874 if (error) { 875 bp->b_flags |= B_ERROR; 876 bp->b_error = error; 877 } 878 } else if (bp->b_flags & B_READ) { 879 io.iov_len = uiop->uio_resid = bp->b_bcount; 880 io.iov_base = bp->b_data; 881 uiop->uio_rw = UIO_READ; 882 switch (vp->v_type) { 883 case VREG: 884 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 885 nfsstats.read_bios++; 886 error = nfs_readrpc(vp, uiop); 887 if (!error && uiop->uio_resid) { 888 889 /* 890 * If len > 0, there is a hole in the file and 891 * no writes after the hole have been pushed to 892 * the server yet. 893 * Just zero fill the rest of the valid area. 894 */ 895 896 diff = bp->b_bcount - uiop->uio_resid; 897 len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT) 898 + diff); 899 if (len > 0) { 900 len = min(len, uiop->uio_resid); 901 memset((char *)bp->b_data + diff, 0, len); 902 } 903 } 904 if (p && (vp->v_flag & VTEXT) && 905 (((nmp->nm_flag & NFSMNT_NQNFS) && 906 NQNFS_CKINVALID(vp, np, ND_READ) && 907 np->n_lrev != np->n_brev) || 908 (!(nmp->nm_flag & NFSMNT_NQNFS) && 909 np->n_mtime != np->n_vattr->va_mtime.tv_sec))) { 910 uprintf("Process killed due to " 911 "text file modification\n"); 912 psignal(p, SIGKILL); 913 p->p_holdcnt++; 914 } 915 break; 916 case VLNK: 917 uiop->uio_offset = (off_t)0; 918 nfsstats.readlink_bios++; 919 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred); 920 break; 921 case VDIR: 922 nfsstats.readdir_bios++; 923 uiop->uio_offset = bp->b_dcookie; 924 if (nmp->nm_flag & NFSMNT_RDIRPLUS) { 925 error = nfs_readdirplusrpc(vp, uiop, curproc->p_ucred); 926 if (error == NFSERR_NOTSUPP) 927 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 928 } 929 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) 930 error = nfs_readdirrpc(vp, uiop, curproc->p_ucred); 931 if (!error) { 932 bp->b_dcookie = uiop->uio_offset; 933 } 934 break; 935 default: 936 printf("nfs_doio: type %x unexpected\n",vp->v_type); 937 break; 938 } 939 if (error) { 940 bp->b_flags |= B_ERROR; 941 bp->b_error = error; 942 } 943 } else { 944 /* 945 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not 946 * an actual write will have to be scheduled. 947 */ 948 949 io.iov_base = bp->b_data; 950 io.iov_len = uiop->uio_resid = bp->b_bcount; 951 uiop->uio_offset = (((off_t)bp->b_blkno) << DEV_BSHIFT); 952 uiop->uio_rw = UIO_WRITE; 953 nfsstats.write_bios++; 954 iomode = NFSV3WRITE_UNSTABLE; 955 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 956 } 957 bp->b_resid = uiop->uio_resid; 958 if (must_commit) 959 nfs_clearcommit(vp->v_mount); 960 biodone(bp); 961 return (error); 962 } 963 964 /* 965 * Vnode op for VM getpages. 966 */ 967 int 968 nfs_getpages(v) 969 void *v; 970 { 971 struct vop_getpages_args /* { 972 struct vnode *a_vp; 973 voff_t a_offset; 974 vm_page_t *a_m; 975 int *a_count; 976 int a_centeridx; 977 vm_prot_t a_access_type; 978 int a_advice; 979 int a_flags; 980 } */ *ap = v; 981 982 off_t eof, offset, origoffset, startoffset, endoffset; 983 int s, i, error, npages, orignpages, npgs, ridx, pidx, pcount; 984 vaddr_t kva; 985 struct buf *bp, *mbp; 986 struct vnode *vp = ap->a_vp; 987 struct nfsnode *np = VTONFS(vp); 988 struct uvm_object *uobj = &vp->v_uvm.u_obj; 989 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 990 size_t bytes, iobytes, tailbytes, totalbytes, skipbytes; 991 int flags = ap->a_flags; 992 int bsize; 993 struct vm_page *pgs[16]; /* XXXUBC 16 */ 994 boolean_t v3 = NFS_ISV3(vp); 995 boolean_t async = (flags & PGO_SYNCIO) == 0; 996 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 997 998 UVMHIST_FUNC("nfs_getpages"); UVMHIST_CALLED(ubchist); 999 UVMHIST_LOG(ubchist, "vp %p off 0x%x count %d", vp, (int)ap->a_offset, 1000 *ap->a_count,0); 1001 1002 #ifdef DIAGNOSTIC 1003 if (ap->a_centeridx < 0 || ap->a_centeridx >= *ap->a_count) { 1004 panic("nfs_getpages: centeridx %d out of range", 1005 ap->a_centeridx); 1006 } 1007 #endif 1008 1009 error = 0; 1010 origoffset = ap->a_offset; 1011 eof = vp->v_uvm.u_size; 1012 if (origoffset >= eof) { 1013 if ((flags & PGO_LOCKED) == 0) { 1014 simple_unlock(&uobj->vmobjlock); 1015 } 1016 UVMHIST_LOG(ubchist, "off 0x%x past EOF 0x%x", 1017 (int)origoffset, (int)eof,0,0); 1018 return EINVAL; 1019 } 1020 1021 if (flags & PGO_LOCKED) { 1022 uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m, 1023 UFP_NOWAIT|UFP_NOALLOC); 1024 return 0; 1025 } 1026 1027 /* vnode is VOP_LOCKed, uobj is locked */ 1028 1029 bsize = nmp->nm_rsize; 1030 orignpages = min(*ap->a_count, 1031 round_page(eof - origoffset) >> PAGE_SHIFT); 1032 npages = orignpages; 1033 startoffset = origoffset & ~(bsize - 1); 1034 endoffset = round_page((origoffset + (npages << PAGE_SHIFT) 1035 + bsize - 1) & ~(bsize - 1)); 1036 endoffset = min(endoffset, round_page(eof)); 1037 ridx = (origoffset - startoffset) >> PAGE_SHIFT; 1038 1039 if (!async && !write) { 1040 int rapages = max(PAGE_SIZE, nmp->nm_rsize) >> PAGE_SHIFT; 1041 1042 (void) VOP_GETPAGES(vp, endoffset, NULL, &rapages, 0, 1043 VM_PROT_READ, 0, 0); 1044 simple_lock(&uobj->vmobjlock); 1045 } 1046 1047 UVMHIST_LOG(ubchist, "npages %d offset 0x%x", npages, 1048 (int)origoffset, 0,0); 1049 memset(pgs, 0, sizeof(pgs)); 1050 uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL); 1051 1052 if (flags & PGO_OVERWRITE) { 1053 UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0); 1054 1055 /* XXXUBC for now, zero the page if we allocated it */ 1056 for (i = 0; i < npages; i++) { 1057 struct vm_page *pg = pgs[ridx + i]; 1058 1059 if (pg->flags & PG_FAKE) { 1060 uvm_pagezero(pg); 1061 pg->flags &= ~(PG_FAKE); 1062 } 1063 } 1064 goto out; 1065 } 1066 1067 /* 1068 * if the pages are already resident, just return them. 1069 */ 1070 1071 for (i = 0; i < npages; i++) { 1072 struct vm_page *pg = pgs[ridx + i]; 1073 1074 if ((pg->flags & PG_FAKE) != 0 || 1075 ((ap->a_access_type & VM_PROT_WRITE) && 1076 (pg->flags & PG_RDONLY))) { 1077 break; 1078 } 1079 } 1080 if (i == npages) { 1081 UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0); 1082 goto out; 1083 } 1084 1085 /* 1086 * the page wasn't resident and we're not overwriting, 1087 * so we're going to have to do some i/o. 1088 * find any additional pages needed to cover the expanded range. 1089 */ 1090 1091 if (startoffset != origoffset || 1092 startoffset + (npages << PAGE_SHIFT) != endoffset) { 1093 UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x", 1094 (int)startoffset, (int)endoffset, 0,0); 1095 npages = (endoffset - startoffset) >> PAGE_SHIFT; 1096 KASSERT(npages != 0); 1097 npgs = npages; 1098 uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL); 1099 } 1100 simple_unlock(&uobj->vmobjlock); 1101 1102 /* 1103 * update the cached read creds for this node. 1104 */ 1105 1106 if (np->n_rcred) { 1107 crfree(np->n_rcred); 1108 } 1109 np->n_rcred = curproc->p_ucred; 1110 crhold(np->n_rcred); 1111 1112 /* 1113 * read the desired page(s). 1114 */ 1115 1116 totalbytes = npages << PAGE_SHIFT; 1117 bytes = min(totalbytes, vp->v_uvm.u_size - startoffset); 1118 tailbytes = totalbytes - bytes; 1119 skipbytes = 0; 1120 1121 kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK | 1122 UVMPAGER_MAPIN_READ); 1123 1124 s = splbio(); 1125 mbp = pool_get(&bufpool, PR_WAITOK); 1126 splx(s); 1127 mbp->b_bufsize = totalbytes; 1128 mbp->b_data = (void *)kva; 1129 mbp->b_resid = mbp->b_bcount = bytes; 1130 mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0); 1131 mbp->b_iodone = uvm_aio_biodone; 1132 mbp->b_vp = vp; 1133 LIST_INIT(&mbp->b_dep); 1134 1135 /* 1136 * if EOF is in the middle of the last page, zero the part past EOF. 1137 */ 1138 1139 if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) { 1140 memset((char *)kva + bytes, 0, tailbytes); 1141 } 1142 1143 /* 1144 * now loop over the pages, reading as needed. 1145 */ 1146 1147 bp = NULL; 1148 for (offset = startoffset; 1149 bytes > 0; 1150 offset += iobytes, bytes -= iobytes) { 1151 1152 /* 1153 * skip pages which don't need to be read. 1154 */ 1155 1156 pidx = (offset - startoffset) >> PAGE_SHIFT; 1157 UVMHIST_LOG(ubchist, "pidx %d offset 0x%x startoffset 0x%x", 1158 pidx, (int)offset, (int)startoffset,0); 1159 while ((pgs[pidx]->flags & PG_FAKE) == 0) { 1160 size_t b; 1161 1162 #ifdef DEBUG 1163 if (offset & (PAGE_SIZE - 1)) { 1164 panic("nfs_getpages: skipping from middle " 1165 "of page"); 1166 } 1167 #endif 1168 1169 b = min(PAGE_SIZE, bytes); 1170 offset += b; 1171 bytes -= b; 1172 skipbytes += b; 1173 pidx++; 1174 UVMHIST_LOG(ubchist, "skipping, new offset 0x%x", 1175 (int)offset, 0,0,0); 1176 if (bytes == 0) { 1177 goto loopdone; 1178 } 1179 } 1180 1181 /* 1182 * see how many pages can be read with this i/o. 1183 * reduce the i/o size if necessary. 1184 */ 1185 1186 iobytes = bytes; 1187 if (offset + iobytes > round_page(offset)) { 1188 pcount = 1; 1189 while (pidx + pcount < npages && 1190 pgs[pidx + pcount]->flags & PG_FAKE) { 1191 pcount++; 1192 } 1193 iobytes = min(iobytes, (pcount << PAGE_SHIFT) - 1194 (offset - trunc_page(offset))); 1195 } 1196 iobytes = min(iobytes, nmp->nm_rsize); 1197 1198 /* 1199 * allocate a sub-buf for this piece of the i/o 1200 * (or just use mbp if there's only 1 piece), 1201 * and start it going. 1202 */ 1203 1204 if (offset == startoffset && iobytes == bytes) { 1205 bp = mbp; 1206 } else { 1207 s = splbio(); 1208 bp = pool_get(&bufpool, PR_WAITOK); 1209 splx(s); 1210 bp->b_data = (char *)kva + offset - startoffset; 1211 bp->b_resid = bp->b_bcount = iobytes; 1212 bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC; 1213 bp->b_iodone = uvm_aio_biodone1; 1214 bp->b_vp = vp; 1215 LIST_INIT(&bp->b_dep); 1216 } 1217 bp->b_private = mbp; 1218 bp->b_lblkno = bp->b_blkno = offset >> DEV_BSHIFT; 1219 1220 UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x", 1221 bp, offset, iobytes, bp->b_blkno); 1222 1223 VOP_STRATEGY(bp); 1224 } 1225 1226 loopdone: 1227 if (skipbytes) { 1228 s = splbio(); 1229 mbp->b_resid -= skipbytes; 1230 if (mbp->b_resid == 0) { 1231 biodone(mbp); 1232 } 1233 splx(s); 1234 } 1235 if (async) { 1236 UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0); 1237 return EINPROGRESS; 1238 } 1239 if (bp != NULL) { 1240 error = biowait(mbp); 1241 } 1242 s = splbio(); 1243 pool_put(&bufpool, mbp); 1244 splx(s); 1245 uvm_pagermapout(kva, npages); 1246 1247 if (write && v3) { 1248 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1249 nfs_del_committed_range(vp, origoffset, npages); 1250 nfs_del_tobecommitted_range(vp, origoffset, npages); 1251 for (i = 0; i < npages; i++) { 1252 if (pgs[i] == NULL) { 1253 continue; 1254 } 1255 pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY); 1256 } 1257 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1258 } 1259 1260 simple_lock(&uobj->vmobjlock); 1261 1262 out: 1263 uvm_lock_pageq(); 1264 if (error) { 1265 for (i = 0; i < npages; i++) { 1266 if (pgs[i] == NULL) { 1267 continue; 1268 } 1269 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 1270 pgs[i], pgs[i]->flags, 0,0); 1271 if ((pgs[i]->flags & PG_FAKE) == 0) { 1272 continue; 1273 } 1274 if (pgs[i]->flags & PG_WANTED) { 1275 wakeup(pgs[i]); 1276 } 1277 uvm_pagefree(pgs[i]); 1278 } 1279 goto done; 1280 } 1281 1282 UVMHIST_LOG(ubchist, "ridx %d count %d", ridx, npages, 0,0); 1283 for (i = 0; i < npages; i++) { 1284 if (pgs[i] == NULL) { 1285 continue; 1286 } 1287 UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x", 1288 pgs[i], pgs[i]->flags, 0,0); 1289 if (pgs[i]->flags & PG_FAKE) { 1290 UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x", 1291 pgs[i], (int)pgs[i]->offset,0,0); 1292 pgs[i]->flags &= ~(PG_FAKE); 1293 pmap_clear_modify(pgs[i]); 1294 pmap_clear_reference(pgs[i]); 1295 } 1296 if (i < ridx || i >= ridx + orignpages || async) { 1297 UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x", 1298 pgs[i], (int)pgs[i]->offset,0,0); 1299 KASSERT((pgs[i]->flags & PG_RELEASED) == 0); 1300 if (pgs[i]->flags & PG_WANTED) { 1301 wakeup(pgs[i]); 1302 } 1303 if (pgs[i]->wire_count == 0) { 1304 uvm_pageactivate(pgs[i]); 1305 } 1306 pgs[i]->flags &= ~(PG_WANTED|PG_BUSY); 1307 UVM_PAGE_OWN(pgs[i], NULL); 1308 } 1309 } 1310 1311 done: 1312 uvm_unlock_pageq(); 1313 simple_unlock(&uobj->vmobjlock); 1314 if (ap->a_m != NULL) { 1315 memcpy(ap->a_m, &pgs[ridx], 1316 *ap->a_count * sizeof(struct vm_page *)); 1317 } 1318 1319 UVMHIST_LOG(ubchist, "done -> %d", error, 0,0,0); 1320 return error; 1321 } 1322 1323 /* 1324 * Vnode op for VM putpages. 1325 */ 1326 int 1327 nfs_putpages(v) 1328 void *v; 1329 { 1330 struct vop_putpages_args /* { 1331 struct vnode *a_vp; 1332 struct vm_page **a_m; 1333 int a_count; 1334 int a_flags; 1335 int *a_rtvals; 1336 } */ *ap = v; 1337 1338 struct vnode *vp = ap->a_vp; 1339 struct nfsnode *np = VTONFS(vp); 1340 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 1341 struct buf *bp, *mbp; 1342 struct vm_page **pgs = ap->a_m; 1343 int flags = ap->a_flags; 1344 int npages = ap->a_count; 1345 int s, error = 0, i; 1346 size_t bytes, iobytes, skipbytes; 1347 vaddr_t kva; 1348 off_t offset, origoffset, commitoff; 1349 uint32_t commitbytes; 1350 boolean_t v3 = NFS_ISV3(vp); 1351 boolean_t async = (flags & PGO_SYNCIO) == 0; 1352 boolean_t weak = (flags & PGO_WEAK) && v3; 1353 UVMHIST_FUNC("nfs_putpages"); UVMHIST_CALLED(ubchist); 1354 1355 UVMHIST_LOG(ubchist, "vp %p pgp %p count %d", 1356 vp, ap->a_m, ap->a_count,0); 1357 1358 simple_unlock(&vp->v_uvm.u_obj.vmobjlock); 1359 1360 origoffset = pgs[0]->offset; 1361 bytes = min(ap->a_count << PAGE_SHIFT, vp->v_uvm.u_size - origoffset); 1362 skipbytes = 0; 1363 1364 /* 1365 * if the range has been committed already, mark the pages thus. 1366 * if the range just needs to be committed, we're done 1367 * if it's a weak putpage, otherwise commit the range. 1368 */ 1369 1370 if (v3) { 1371 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1372 if (nfs_in_committed_range(vp, origoffset, bytes)) { 1373 goto committed; 1374 } 1375 if (nfs_in_tobecommitted_range(vp, origoffset, bytes)) { 1376 if (weak) { 1377 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1378 return 0; 1379 } else { 1380 commitoff = np->n_pushlo; 1381 commitbytes = (uint32_t)(np->n_pushhi - 1382 np->n_pushlo); 1383 goto commit; 1384 } 1385 } 1386 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1387 } 1388 1389 /* 1390 * otherwise write or commit all the pages. 1391 */ 1392 1393 kva = uvm_pagermapin(pgs, ap->a_count, UVMPAGER_MAPIN_WAITOK| 1394 UVMPAGER_MAPIN_WRITE); 1395 1396 s = splbio(); 1397 vp->v_numoutput += 2; 1398 mbp = pool_get(&bufpool, PR_WAITOK); 1399 UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x", 1400 vp, mbp, vp->v_numoutput, bytes); 1401 splx(s); 1402 mbp->b_bufsize = npages << PAGE_SHIFT; 1403 mbp->b_data = (void *)kva; 1404 mbp->b_resid = mbp->b_bcount = bytes; 1405 mbp->b_flags = B_BUSY|B_WRITE|B_AGE | 1406 (async ? B_CALL|B_ASYNC : 0) | 1407 (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0); 1408 mbp->b_iodone = uvm_aio_aiodone; 1409 mbp->b_vp = vp; 1410 LIST_INIT(&mbp->b_dep); 1411 1412 for (offset = origoffset; 1413 bytes > 0; 1414 offset += iobytes, bytes -= iobytes) { 1415 iobytes = min(nmp->nm_wsize, bytes); 1416 1417 /* 1418 * skip writing any pages which only need a commit. 1419 */ 1420 1421 if ((pgs[(offset - origoffset) >> PAGE_SHIFT]->flags & 1422 PG_NEEDCOMMIT) != 0) { 1423 iobytes = PAGE_SIZE; 1424 skipbytes += min(iobytes, vp->v_uvm.u_size - offset); 1425 continue; 1426 } 1427 1428 /* if it's really one i/o, don't make a second buf */ 1429 if (offset == origoffset && iobytes == bytes) { 1430 bp = mbp; 1431 } else { 1432 s = splbio(); 1433 vp->v_numoutput++; 1434 bp = pool_get(&bufpool, PR_WAITOK); 1435 UVMHIST_LOG(ubchist, "vp %p bp %p num now %d", 1436 vp, bp, vp->v_numoutput, 0); 1437 splx(s); 1438 bp->b_data = (char *)kva + (offset - origoffset); 1439 bp->b_resid = bp->b_bcount = iobytes; 1440 bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC; 1441 bp->b_iodone = uvm_aio_biodone1; 1442 bp->b_vp = vp; 1443 LIST_INIT(&bp->b_dep); 1444 } 1445 bp->b_private = mbp; 1446 bp->b_lblkno = bp->b_blkno = (daddr_t)(offset >> DEV_BSHIFT); 1447 UVMHIST_LOG(ubchist, "bp %p numout %d", 1448 bp, vp->v_numoutput,0,0); 1449 VOP_STRATEGY(bp); 1450 } 1451 if (skipbytes) { 1452 UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0); 1453 s = splbio(); 1454 mbp->b_resid -= skipbytes; 1455 if (mbp->b_resid == 0) { 1456 biodone(mbp); 1457 } 1458 splx(s); 1459 } 1460 if (async) { 1461 return EINPROGRESS; 1462 } 1463 error = biowait(mbp); 1464 1465 s = splbio(); 1466 vwakeup(mbp); 1467 pool_put(&bufpool, mbp); 1468 splx(s); 1469 1470 uvm_pagermapout(kva, ap->a_count); 1471 if (error || !v3) { 1472 UVMHIST_LOG(ubchist, "returning error %d", error, 0,0,0); 1473 return error; 1474 } 1475 1476 /* 1477 * for a weak put, mark the range as "to be committed" 1478 * and mark the pages read-only so that we will be notified 1479 * to remove the pages from the "to be committed" range 1480 * if they are made dirty again. 1481 * for a strong put, commit the pages and remove them from the 1482 * "to be committed" range. also, mark them as writable 1483 * and not cleanable with just a commit. 1484 */ 1485 1486 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1487 if (weak) { 1488 nfs_add_tobecommitted_range(vp, origoffset, 1489 npages << PAGE_SHIFT); 1490 for (i = 0; i < npages; i++) { 1491 pgs[i]->flags |= PG_NEEDCOMMIT|PG_RDONLY; 1492 } 1493 } else { 1494 commitoff = origoffset; 1495 commitbytes = npages << PAGE_SHIFT; 1496 commit: 1497 error = nfs_commit(vp, commitoff, commitbytes, curproc); 1498 nfs_del_tobecommitted_range(vp, commitoff, commitbytes); 1499 committed: 1500 for (i = 0; i < npages; i++) { 1501 pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY); 1502 } 1503 } 1504 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1505 return error; 1506 } 1507