1 /* $NetBSD: nfs_bio.c,v 1.73 2001/12/31 07:16:47 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: nfs_bio.c,v 1.73 2001/12/31 07:16:47 chs Exp $"); 43 44 #include "opt_nfs.h" 45 #include "opt_ddb.h" 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/resourcevar.h> 50 #include <sys/signalvar.h> 51 #include <sys/proc.h> 52 #include <sys/buf.h> 53 #include <sys/vnode.h> 54 #include <sys/mount.h> 55 #include <sys/kernel.h> 56 #include <sys/namei.h> 57 #include <sys/dirent.h> 58 #include <sys/malloc.h> 59 60 #include <uvm/uvm_extern.h> 61 #include <uvm/uvm.h> 62 63 #include <nfs/rpcv2.h> 64 #include <nfs/nfsproto.h> 65 #include <nfs/nfs.h> 66 #include <nfs/nfsmount.h> 67 #include <nfs/nqnfs.h> 68 #include <nfs/nfsnode.h> 69 #include <nfs/nfs_var.h> 70 71 extern int nfs_numasync; 72 extern struct nfsstats nfsstats; 73 74 /* 75 * Vnode op for read using bio 76 * Any similarity to readip() is purely coincidental 77 */ 78 int 79 nfs_bioread(vp, uio, ioflag, cred, cflag) 80 struct vnode *vp; 81 struct uio *uio; 82 int ioflag, cflag; 83 struct ucred *cred; 84 { 85 struct nfsnode *np = VTONFS(vp); 86 struct buf *bp = NULL, *rabp; 87 struct vattr vattr; 88 struct proc *p; 89 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 90 struct nfsdircache *ndp = NULL, *nndp = NULL; 91 caddr_t baddr, ep, edp; 92 int got_buf = 0, error = 0, n = 0, on = 0, en, enn; 93 int enough = 0; 94 struct dirent *dp, *pdp; 95 off_t curoff = 0; 96 97 #ifdef DIAGNOSTIC 98 if (uio->uio_rw != UIO_READ) 99 panic("nfs_read mode"); 100 #endif 101 if (uio->uio_resid == 0) 102 return (0); 103 if (vp->v_type != VDIR && uio->uio_offset < 0) 104 return (EINVAL); 105 p = uio->uio_procp; 106 #ifndef NFS_V2_ONLY 107 if ((nmp->nm_flag & NFSMNT_NFSV3) && 108 !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) 109 (void)nfs_fsinfo(nmp, vp, cred, p); 110 #endif 111 if (vp->v_type != VDIR && 112 (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 113 return (EFBIG); 114 115 /* 116 * For nfs, cache consistency can only be maintained approximately. 117 * Although RFC1094 does not specify the criteria, the following is 118 * believed to be compatible with the reference port. 119 * For nqnfs, full cache consistency is maintained within the loop. 120 * For nfs: 121 * If the file's modify time on the server has changed since the 122 * last read rpc or you have written to the file, 123 * you may have lost data cache consistency with the 124 * server, so flush all of the file's data out of the cache. 125 * Then force a getattr rpc to ensure that you have up to date 126 * attributes. 127 * NB: This implies that cache data can be read when up to 128 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 129 * attributes this could be forced by setting n_attrstamp to 0 before 130 * the VOP_GETATTR() call. 131 */ 132 133 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 134 if (np->n_flag & NMODIFIED) { 135 if (vp->v_type != VREG) { 136 if (vp->v_type != VDIR) 137 panic("nfs: bioread, not dir"); 138 nfs_invaldircache(vp, 0); 139 np->n_direofoffset = 0; 140 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 141 if (error) 142 return (error); 143 } 144 np->n_attrstamp = 0; 145 error = VOP_GETATTR(vp, &vattr, cred, p); 146 if (error) 147 return (error); 148 np->n_mtime = vattr.va_mtime.tv_sec; 149 } else { 150 error = VOP_GETATTR(vp, &vattr, cred, p); 151 if (error) 152 return (error); 153 if (np->n_mtime != vattr.va_mtime.tv_sec) { 154 if (vp->v_type == VDIR) { 155 nfs_invaldircache(vp, 0); 156 np->n_direofoffset = 0; 157 } 158 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 159 if (error) 160 return (error); 161 np->n_mtime = vattr.va_mtime.tv_sec; 162 } 163 } 164 } 165 166 /* 167 * update the cached read creds for this node. 168 */ 169 170 if (np->n_rcred) { 171 crfree(np->n_rcred); 172 } 173 np->n_rcred = cred; 174 crhold(cred); 175 176 do { 177 #ifndef NFS_V2_ONLY 178 /* 179 * Get a valid lease. If cached data is stale, flush it. 180 */ 181 if (nmp->nm_flag & NFSMNT_NQNFS) { 182 if (NQNFS_CKINVALID(vp, np, ND_READ)) { 183 do { 184 error = nqnfs_getlease(vp, ND_READ, cred, p); 185 } while (error == NQNFS_EXPIRED); 186 if (error) 187 return (error); 188 if (np->n_lrev != np->n_brev || 189 (np->n_flag & NQNFSNONCACHE) || 190 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 191 if (vp->v_type == VDIR) { 192 nfs_invaldircache(vp, 0); 193 np->n_direofoffset = 0; 194 } 195 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 196 if (error) 197 return (error); 198 np->n_brev = np->n_lrev; 199 } 200 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 201 nfs_invaldircache(vp, 0); 202 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 203 np->n_direofoffset = 0; 204 if (error) 205 return (error); 206 } 207 } 208 #endif 209 /* 210 * Don't cache symlinks. 211 */ 212 if (np->n_flag & NQNFSNONCACHE 213 || ((vp->v_flag & VROOT) && vp->v_type == VLNK)) { 214 switch (vp->v_type) { 215 case VREG: 216 return (nfs_readrpc(vp, uio)); 217 case VLNK: 218 return (nfs_readlinkrpc(vp, uio, cred)); 219 case VDIR: 220 break; 221 default: 222 printf(" NQNFSNONCACHE: type %x unexpected\n", 223 vp->v_type); 224 }; 225 } 226 baddr = (caddr_t)0; 227 switch (vp->v_type) { 228 case VREG: 229 nfsstats.biocache_reads++; 230 231 error = 0; 232 if (uio->uio_offset >= np->n_size) { 233 break; 234 } 235 while (uio->uio_resid > 0) { 236 void *win; 237 vsize_t bytelen = MIN(np->n_size - uio->uio_offset, 238 uio->uio_resid); 239 240 if (bytelen == 0) 241 break; 242 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, 243 &bytelen, UBC_READ); 244 error = uiomove(win, bytelen, uio); 245 ubc_release(win, 0); 246 if (error) { 247 break; 248 } 249 } 250 n = 0; 251 break; 252 253 case VLNK: 254 nfsstats.biocache_readlinks++; 255 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 256 if (!bp) 257 return (EINTR); 258 if ((bp->b_flags & B_DONE) == 0) { 259 bp->b_flags |= B_READ; 260 error = nfs_doio(bp, p); 261 if (error) { 262 brelse(bp); 263 return (error); 264 } 265 } 266 n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 267 got_buf = 1; 268 on = 0; 269 break; 270 case VDIR: 271 diragain: 272 nfsstats.biocache_readdirs++; 273 ndp = nfs_searchdircache(vp, uio->uio_offset, 274 (nmp->nm_flag & NFSMNT_XLATECOOKIE), 0); 275 if (!ndp) { 276 /* 277 * We've been handed a cookie that is not 278 * in the cache. If we're not translating 279 * 32 <-> 64, it may be a value that was 280 * flushed out of the cache because it grew 281 * too big. Let the server judge if it's 282 * valid or not. In the translation case, 283 * we have no way of validating this value, 284 * so punt. 285 */ 286 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) 287 return (EINVAL); 288 ndp = nfs_enterdircache(vp, uio->uio_offset, 289 uio->uio_offset, 0, 0); 290 } 291 292 if (uio->uio_offset != 0 && 293 ndp->dc_cookie == np->n_direofoffset) { 294 nfsstats.direofcache_hits++; 295 return (0); 296 } 297 298 bp = nfs_getcacheblk(vp, ndp->dc_blkno, NFS_DIRBLKSIZ, p); 299 if (!bp) 300 return (EINTR); 301 if ((bp->b_flags & B_DONE) == 0) { 302 bp->b_flags |= B_READ; 303 bp->b_dcookie = ndp->dc_blkcookie; 304 error = nfs_doio(bp, p); 305 if (error) { 306 /* 307 * Yuck! The directory has been modified on the 308 * server. Punt and let the userland code 309 * deal with it. 310 */ 311 brelse(bp); 312 if (error == NFSERR_BAD_COOKIE) { 313 nfs_invaldircache(vp, 0); 314 nfs_vinvalbuf(vp, 0, cred, p, 1); 315 error = EINVAL; 316 } 317 return (error); 318 } 319 } 320 321 /* 322 * Just return if we hit EOF right away with this 323 * block. Always check here, because direofoffset 324 * may have been set by an nfsiod since the last 325 * check. 326 */ 327 if (np->n_direofoffset != 0 && 328 ndp->dc_blkcookie == np->n_direofoffset) { 329 brelse(bp); 330 return (0); 331 } 332 333 /* 334 * Find the entry we were looking for in the block. 335 */ 336 337 en = ndp->dc_entry; 338 339 pdp = dp = (struct dirent *)bp->b_data; 340 edp = bp->b_data + bp->b_bcount - bp->b_resid; 341 enn = 0; 342 while (enn < en && (caddr_t)dp < edp) { 343 pdp = dp; 344 dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); 345 enn++; 346 } 347 348 /* 349 * If the entry number was bigger than the number of 350 * entries in the block, or the cookie of the previous 351 * entry doesn't match, the directory cache is 352 * stale. Flush it and try again (i.e. go to 353 * the server). 354 */ 355 if ((caddr_t)dp >= edp || (caddr_t)dp + dp->d_reclen > edp || 356 (en > 0 && NFS_GETCOOKIE(pdp) != ndp->dc_cookie)) { 357 #ifdef DEBUG 358 printf("invalid cache: %p %p %p off %lx %lx\n", 359 pdp, dp, edp, 360 (unsigned long)uio->uio_offset, 361 (unsigned long)NFS_GETCOOKIE(pdp)); 362 #endif 363 brelse(bp); 364 nfs_invaldircache(vp, 0); 365 nfs_vinvalbuf(vp, 0, cred, p, 0); 366 goto diragain; 367 } 368 369 on = (caddr_t)dp - bp->b_data; 370 371 /* 372 * Cache all entries that may be exported to the 373 * user, as they may be thrown back at us. The 374 * NFSBIO_CACHECOOKIES flag indicates that all 375 * entries are being 'exported', so cache them all. 376 */ 377 378 if (en == 0 && pdp == dp) { 379 dp = (struct dirent *) 380 ((caddr_t)dp + dp->d_reclen); 381 enn++; 382 } 383 384 if (uio->uio_resid < (bp->b_bcount - bp->b_resid - on)) { 385 n = uio->uio_resid; 386 enough = 1; 387 } else 388 n = bp->b_bcount - bp->b_resid - on; 389 390 ep = bp->b_data + on + n; 391 392 /* 393 * Find last complete entry to copy, caching entries 394 * (if requested) as we go. 395 */ 396 397 while ((caddr_t)dp < ep && (caddr_t)dp + dp->d_reclen <= ep) { 398 if (cflag & NFSBIO_CACHECOOKIES) { 399 nndp = nfs_enterdircache(vp, NFS_GETCOOKIE(pdp), 400 ndp->dc_blkcookie, enn, bp->b_lblkno); 401 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 402 NFS_STASHCOOKIE32(pdp, 403 nndp->dc_cookie32); 404 } 405 } 406 pdp = dp; 407 dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); 408 enn++; 409 } 410 411 /* 412 * If the last requested entry was not the last in the 413 * buffer (happens if NFS_DIRFRAGSIZ < NFS_DIRBLKSIZ), 414 * cache the cookie of the last requested one, and 415 * set of the offset to it. 416 */ 417 418 if ((on + n) < bp->b_bcount - bp->b_resid) { 419 curoff = NFS_GETCOOKIE(pdp); 420 nndp = nfs_enterdircache(vp, curoff, ndp->dc_blkcookie, 421 enn, bp->b_lblkno); 422 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 423 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); 424 curoff = nndp->dc_cookie32; 425 } 426 } else 427 curoff = bp->b_dcookie; 428 429 /* 430 * Always cache the entry for the next block, 431 * so that readaheads can use it. 432 */ 433 nndp = nfs_enterdircache(vp, bp->b_dcookie, bp->b_dcookie, 0,0); 434 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 435 if (curoff == bp->b_dcookie) { 436 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); 437 curoff = nndp->dc_cookie32; 438 } 439 } 440 441 n = ((caddr_t)pdp + pdp->d_reclen) - (bp->b_data + on); 442 443 /* 444 * If not eof and read aheads are enabled, start one. 445 * (You need the current block first, so that you have the 446 * directory offset cookie of the next block.) 447 */ 448 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 449 np->n_direofoffset == 0 && !(np->n_flag & NQNFSNONCACHE)) { 450 rabp = nfs_getcacheblk(vp, nndp->dc_blkno, 451 NFS_DIRBLKSIZ, p); 452 if (rabp) { 453 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 454 rabp->b_dcookie = nndp->dc_cookie; 455 rabp->b_flags |= (B_READ | B_ASYNC); 456 if (nfs_asyncio(rabp)) { 457 rabp->b_flags |= B_INVAL; 458 brelse(rabp); 459 } 460 } else 461 brelse(rabp); 462 } 463 } 464 got_buf = 1; 465 break; 466 default: 467 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 468 break; 469 } 470 471 if (n > 0) { 472 if (!baddr) 473 baddr = bp->b_data; 474 error = uiomove(baddr + on, (int)n, uio); 475 } 476 switch (vp->v_type) { 477 case VREG: 478 break; 479 case VLNK: 480 n = 0; 481 break; 482 case VDIR: 483 if (np->n_flag & NQNFSNONCACHE) 484 bp->b_flags |= B_INVAL; 485 uio->uio_offset = curoff; 486 if (enough) 487 n = 0; 488 break; 489 default: 490 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 491 } 492 if (got_buf) 493 brelse(bp); 494 } while (error == 0 && uio->uio_resid > 0 && n > 0); 495 return (error); 496 } 497 498 /* 499 * Vnode op for write using bio 500 */ 501 int 502 nfs_write(v) 503 void *v; 504 { 505 struct vop_write_args /* { 506 struct vnode *a_vp; 507 struct uio *a_uio; 508 int a_ioflag; 509 struct ucred *a_cred; 510 } */ *ap = v; 511 struct uio *uio = ap->a_uio; 512 struct proc *p = uio->uio_procp; 513 struct vnode *vp = ap->a_vp; 514 struct nfsnode *np = VTONFS(vp); 515 struct ucred *cred = ap->a_cred; 516 int ioflag = ap->a_ioflag; 517 struct vattr vattr; 518 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 519 void *win; 520 voff_t oldoff, origoff; 521 vsize_t bytelen; 522 int error = 0, iomode, must_commit; 523 524 #ifdef DIAGNOSTIC 525 if (uio->uio_rw != UIO_WRITE) 526 panic("nfs_write mode"); 527 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 528 panic("nfs_write proc"); 529 #endif 530 if (vp->v_type != VREG) 531 return (EIO); 532 if (np->n_flag & NWRITEERR) { 533 np->n_flag &= ~NWRITEERR; 534 return (np->n_error); 535 } 536 #ifndef NFS_V2_ONLY 537 if ((nmp->nm_flag & NFSMNT_NFSV3) && 538 !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) 539 (void)nfs_fsinfo(nmp, vp, cred, p); 540 #endif 541 if (ioflag & (IO_APPEND | IO_SYNC)) { 542 if (np->n_flag & NMODIFIED) { 543 np->n_attrstamp = 0; 544 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 545 if (error) 546 return (error); 547 } 548 if (ioflag & IO_APPEND) { 549 np->n_attrstamp = 0; 550 error = VOP_GETATTR(vp, &vattr, cred, p); 551 if (error) 552 return (error); 553 uio->uio_offset = np->n_size; 554 } 555 } 556 if (uio->uio_offset < 0) 557 return (EINVAL); 558 if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 559 return (EFBIG); 560 if (uio->uio_resid == 0) 561 return (0); 562 /* 563 * Maybe this should be above the vnode op call, but so long as 564 * file servers have no limits, i don't think it matters 565 */ 566 if (p && uio->uio_offset + uio->uio_resid > 567 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 568 psignal(p, SIGXFSZ); 569 return (EFBIG); 570 } 571 572 /* 573 * update the cached write creds for this node. 574 */ 575 576 if (np->n_wcred) { 577 crfree(np->n_wcred); 578 } 579 np->n_wcred = cred; 580 crhold(cred); 581 582 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { 583 iomode = NFSV3WRITE_FILESYNC; 584 error = nfs_writerpc(vp, uio, &iomode, &must_commit); 585 if (must_commit) 586 nfs_clearcommit(vp->v_mount); 587 return (error); 588 } 589 590 origoff = uio->uio_offset; 591 do { 592 oldoff = uio->uio_offset; 593 bytelen = uio->uio_resid; 594 595 #ifndef NFS_V2_ONLY 596 /* 597 * Check for a valid write lease. 598 */ 599 if ((nmp->nm_flag & NFSMNT_NQNFS) && 600 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 601 do { 602 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 603 } while (error == NQNFS_EXPIRED); 604 if (error) 605 return (error); 606 if (np->n_lrev != np->n_brev || 607 (np->n_flag & NQNFSNONCACHE)) { 608 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 609 if (error) 610 return (error); 611 np->n_brev = np->n_lrev; 612 } 613 } 614 #endif 615 nfsstats.biocache_writes++; 616 617 np->n_flag |= NMODIFIED; 618 if (np->n_size < uio->uio_offset + bytelen) { 619 np->n_size = uio->uio_offset + bytelen; 620 } 621 if ((uio->uio_offset & PAGE_MASK) == 0 && 622 ((uio->uio_offset + bytelen) & PAGE_MASK) == 0) { 623 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen, 624 UBC_WRITE | UBC_FAULTBUSY); 625 } else { 626 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen, 627 UBC_WRITE); 628 } 629 error = uiomove(win, bytelen, uio); 630 ubc_release(win, 0); 631 if (error) { 632 break; 633 } 634 635 /* 636 * update UVM's notion of the size now that we've 637 * copied the data into the vnode's pages. 638 */ 639 640 if (vp->v_size < uio->uio_offset) { 641 uvm_vnp_setsize(vp, uio->uio_offset); 642 } 643 644 if ((oldoff & ~(nmp->nm_wsize - 1)) != 645 (uio->uio_offset & ~(nmp->nm_wsize - 1))) { 646 simple_lock(&vp->v_interlock); 647 error = VOP_PUTPAGES(vp, 648 trunc_page(oldoff & ~(nmp->nm_wsize - 1)), 649 round_page((uio->uio_offset + nmp->nm_wsize - 1) & 650 ~(nmp->nm_wsize - 1)), 651 PGO_CLEANIT | PGO_WEAK); 652 } 653 } while (uio->uio_resid > 0); 654 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 655 simple_lock(&vp->v_interlock); 656 error = VOP_PUTPAGES(vp, 657 trunc_page(origoff & ~(nmp->nm_wsize - 1)), 658 round_page((uio->uio_offset + nmp->nm_wsize - 1) & 659 ~(nmp->nm_wsize - 1)), 660 PGO_CLEANIT | PGO_SYNCIO); 661 } 662 return error; 663 } 664 665 /* 666 * Get an nfs cache block. 667 * Allocate a new one if the block isn't currently in the cache 668 * and return the block marked busy. If the calling process is 669 * interrupted by a signal for an interruptible mount point, return 670 * NULL. 671 */ 672 struct buf * 673 nfs_getcacheblk(vp, bn, size, p) 674 struct vnode *vp; 675 daddr_t bn; 676 int size; 677 struct proc *p; 678 { 679 struct buf *bp; 680 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 681 682 if (nmp->nm_flag & NFSMNT_INT) { 683 bp = getblk(vp, bn, size, PCATCH, 0); 684 while (bp == NULL) { 685 if (nfs_sigintr(nmp, NULL, p)) 686 return (NULL); 687 bp = getblk(vp, bn, size, 0, 2 * hz); 688 } 689 } else 690 bp = getblk(vp, bn, size, 0, 0); 691 return (bp); 692 } 693 694 /* 695 * Flush and invalidate all dirty buffers. If another process is already 696 * doing the flush, just wait for completion. 697 */ 698 int 699 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 700 struct vnode *vp; 701 int flags; 702 struct ucred *cred; 703 struct proc *p; 704 int intrflg; 705 { 706 struct nfsnode *np = VTONFS(vp); 707 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 708 int error = 0, slpflag, slptimeo; 709 710 if ((nmp->nm_flag & NFSMNT_INT) == 0) 711 intrflg = 0; 712 if (intrflg) { 713 slpflag = PCATCH; 714 slptimeo = 2 * hz; 715 } else { 716 slpflag = 0; 717 slptimeo = 0; 718 } 719 /* 720 * First wait for any other process doing a flush to complete. 721 */ 722 while (np->n_flag & NFLUSHINPROG) { 723 np->n_flag |= NFLUSHWANT; 724 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 725 slptimeo); 726 if (error && intrflg && nfs_sigintr(nmp, NULL, p)) 727 return (EINTR); 728 } 729 730 /* 731 * Now, flush as required. 732 */ 733 np->n_flag |= NFLUSHINPROG; 734 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 735 while (error) { 736 if (intrflg && nfs_sigintr(nmp, NULL, p)) { 737 np->n_flag &= ~NFLUSHINPROG; 738 if (np->n_flag & NFLUSHWANT) { 739 np->n_flag &= ~NFLUSHWANT; 740 wakeup((caddr_t)&np->n_flag); 741 } 742 return (EINTR); 743 } 744 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 745 } 746 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 747 if (np->n_flag & NFLUSHWANT) { 748 np->n_flag &= ~NFLUSHWANT; 749 wakeup((caddr_t)&np->n_flag); 750 } 751 return (0); 752 } 753 754 /* 755 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 756 * This is mainly to avoid queueing async I/O requests when the nfsiods 757 * are all hung on a dead server. 758 */ 759 760 int 761 nfs_asyncio(bp) 762 struct buf *bp; 763 { 764 int i; 765 struct nfsmount *nmp; 766 int gotiod, slpflag = 0, slptimeo = 0, error; 767 768 if (nfs_numasync == 0) 769 return (EIO); 770 771 772 nmp = VFSTONFS(bp->b_vp->v_mount); 773 again: 774 if (nmp->nm_flag & NFSMNT_INT) 775 slpflag = PCATCH; 776 gotiod = FALSE; 777 778 /* 779 * Find a free iod to process this request. 780 */ 781 782 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 783 if (nfs_iodwant[i]) { 784 /* 785 * Found one, so wake it up and tell it which 786 * mount to process. 787 */ 788 nfs_iodwant[i] = NULL; 789 nfs_iodmount[i] = nmp; 790 nmp->nm_bufqiods++; 791 wakeup((caddr_t)&nfs_iodwant[i]); 792 gotiod = TRUE; 793 break; 794 } 795 /* 796 * If none are free, we may already have an iod working on this mount 797 * point. If so, it will process our request. 798 */ 799 if (!gotiod && nmp->nm_bufqiods > 0) 800 gotiod = TRUE; 801 802 /* 803 * If we have an iod which can process the request, then queue 804 * the buffer. 805 */ 806 if (gotiod) { 807 /* 808 * Ensure that the queue never grows too large. 809 */ 810 while (nmp->nm_bufqlen >= 2*nfs_numasync) { 811 nmp->nm_bufqwant = TRUE; 812 error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, 813 "nfsaio", slptimeo); 814 if (error) { 815 if (nfs_sigintr(nmp, NULL, bp->b_proc)) 816 return (EINTR); 817 if (slpflag == PCATCH) { 818 slpflag = 0; 819 slptimeo = 2 * hz; 820 } 821 } 822 /* 823 * We might have lost our iod while sleeping, 824 * so check and loop if nescessary. 825 */ 826 if (nmp->nm_bufqiods == 0) 827 goto again; 828 } 829 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); 830 nmp->nm_bufqlen++; 831 return (0); 832 } 833 834 /* 835 * All the iods are busy on other mounts, so return EIO to 836 * force the caller to process the i/o synchronously. 837 */ 838 return (EIO); 839 } 840 841 /* 842 * Do an I/O operation to/from a cache block. This may be called 843 * synchronously or from an nfsiod. 844 */ 845 int 846 nfs_doio(bp, p) 847 struct buf *bp; 848 struct proc *p; 849 { 850 struct uio *uiop; 851 struct vnode *vp; 852 struct nfsnode *np; 853 struct nfsmount *nmp; 854 int error = 0, diff, len, iomode, must_commit = 0; 855 struct uio uio; 856 struct iovec io; 857 858 vp = bp->b_vp; 859 np = VTONFS(vp); 860 nmp = VFSTONFS(vp->v_mount); 861 uiop = &uio; 862 uiop->uio_iov = &io; 863 uiop->uio_iovcnt = 1; 864 uiop->uio_segflg = UIO_SYSSPACE; 865 uiop->uio_procp = p; 866 867 /* 868 * Historically, paging was done with physio, but no more... 869 */ 870 if (bp->b_flags & B_PHYS) { 871 /* 872 * ...though reading /dev/drum still gets us here. 873 */ 874 io.iov_len = uiop->uio_resid = bp->b_bcount; 875 /* mapping was done by vmapbuf() */ 876 io.iov_base = bp->b_data; 877 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 878 if (bp->b_flags & B_READ) { 879 uiop->uio_rw = UIO_READ; 880 nfsstats.read_physios++; 881 error = nfs_readrpc(vp, uiop); 882 } else { 883 iomode = NFSV3WRITE_DATASYNC; 884 uiop->uio_rw = UIO_WRITE; 885 nfsstats.write_physios++; 886 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 887 } 888 if (error) { 889 bp->b_flags |= B_ERROR; 890 bp->b_error = error; 891 } 892 } else if (bp->b_flags & B_READ) { 893 io.iov_len = uiop->uio_resid = bp->b_bcount; 894 io.iov_base = bp->b_data; 895 uiop->uio_rw = UIO_READ; 896 switch (vp->v_type) { 897 case VREG: 898 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 899 nfsstats.read_bios++; 900 error = nfs_readrpc(vp, uiop); 901 if (!error && uiop->uio_resid) { 902 903 /* 904 * If len > 0, there is a hole in the file and 905 * no writes after the hole have been pushed to 906 * the server yet. 907 * Just zero fill the rest of the valid area. 908 */ 909 910 diff = bp->b_bcount - uiop->uio_resid; 911 len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT) 912 + diff); 913 if (len > 0) { 914 len = MIN(len, uiop->uio_resid); 915 memset((char *)bp->b_data + diff, 0, len); 916 } 917 } 918 if (p && (vp->v_flag & VTEXT) && 919 (((nmp->nm_flag & NFSMNT_NQNFS) && 920 NQNFS_CKINVALID(vp, np, ND_READ) && 921 np->n_lrev != np->n_brev) || 922 (!(nmp->nm_flag & NFSMNT_NQNFS) && 923 np->n_mtime != np->n_vattr->va_mtime.tv_sec))) { 924 uprintf("Process killed due to " 925 "text file modification\n"); 926 psignal(p, SIGKILL); 927 p->p_holdcnt++; 928 } 929 break; 930 case VLNK: 931 uiop->uio_offset = (off_t)0; 932 nfsstats.readlink_bios++; 933 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred); 934 break; 935 case VDIR: 936 nfsstats.readdir_bios++; 937 uiop->uio_offset = bp->b_dcookie; 938 if (nmp->nm_flag & NFSMNT_RDIRPLUS) { 939 error = nfs_readdirplusrpc(vp, uiop, curproc->p_ucred); 940 if (error == NFSERR_NOTSUPP) 941 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 942 } 943 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) 944 error = nfs_readdirrpc(vp, uiop, curproc->p_ucred); 945 if (!error) { 946 bp->b_dcookie = uiop->uio_offset; 947 } 948 break; 949 default: 950 printf("nfs_doio: type %x unexpected\n",vp->v_type); 951 break; 952 } 953 if (error) { 954 bp->b_flags |= B_ERROR; 955 bp->b_error = error; 956 } 957 } else { 958 /* 959 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not 960 * an actual write will have to be scheduled. 961 */ 962 963 io.iov_base = bp->b_data; 964 io.iov_len = uiop->uio_resid = bp->b_bcount; 965 uiop->uio_offset = (((off_t)bp->b_blkno) << DEV_BSHIFT); 966 uiop->uio_rw = UIO_WRITE; 967 nfsstats.write_bios++; 968 iomode = NFSV3WRITE_UNSTABLE; 969 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 970 } 971 bp->b_resid = uiop->uio_resid; 972 if (must_commit) 973 nfs_clearcommit(vp->v_mount); 974 biodone(bp); 975 return (error); 976 } 977 978 /* 979 * Vnode op for VM getpages. 980 */ 981 982 int 983 nfs_getpages(v) 984 void *v; 985 { 986 struct vop_getpages_args /* { 987 struct vnode *a_vp; 988 voff_t a_offset; 989 struct vm_page **a_m; 990 int *a_count; 991 int a_centeridx; 992 vm_prot_t a_access_type; 993 int a_advice; 994 int a_flags; 995 } */ *ap = v; 996 997 struct vnode *vp = ap->a_vp; 998 struct uvm_object *uobj = &vp->v_uobj; 999 struct nfsnode *np = VTONFS(vp); 1000 struct vm_page *pg, **pgs; 1001 off_t origoffset; 1002 int i, error, npages; 1003 boolean_t v3 = NFS_ISV3(vp); 1004 boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0; 1005 boolean_t locked = (ap->a_flags & PGO_LOCKED) != 0; 1006 UVMHIST_FUNC("nfs_getpages"); UVMHIST_CALLED(ubchist); 1007 1008 /* 1009 * update the cached read creds for this node. 1010 */ 1011 1012 if (np->n_rcred) { 1013 crfree(np->n_rcred); 1014 } 1015 np->n_rcred = curproc->p_ucred; 1016 crhold(np->n_rcred); 1017 1018 /* 1019 * call the genfs code to get the pages. 1020 */ 1021 1022 npages = *ap->a_count; 1023 error = genfs_getpages(v); 1024 if (error || !write || !v3) { 1025 return error; 1026 } 1027 1028 /* 1029 * this is a write fault, update the commit info. 1030 */ 1031 1032 origoffset = ap->a_offset; 1033 pgs = ap->a_m; 1034 1035 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1036 nfs_del_committed_range(vp, origoffset, npages); 1037 nfs_del_tobecommitted_range(vp, origoffset, npages); 1038 if (!locked) { 1039 simple_lock(&uobj->vmobjlock); 1040 } 1041 for (i = 0; i < npages; i++) { 1042 pg = pgs[i]; 1043 if (pg == NULL || pg == PGO_DONTCARE) { 1044 continue; 1045 } 1046 pg->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY); 1047 } 1048 if (!locked) { 1049 simple_unlock(&uobj->vmobjlock); 1050 } 1051 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1052 return 0; 1053 } 1054 1055 int 1056 nfs_gop_write(struct vnode *vp, struct vm_page **pgs, int npages, int flags) 1057 { 1058 struct uvm_object *uobj = &vp->v_uobj; 1059 struct nfsnode *np = VTONFS(vp); 1060 off_t origoffset, commitoff; 1061 uint32_t commitbytes; 1062 int error, i; 1063 int bytes; 1064 boolean_t v3 = NFS_ISV3(vp); 1065 boolean_t weak = flags & PGO_WEAK; 1066 UVMHIST_FUNC("nfs_gop_write"); UVMHIST_CALLED(ubchist); 1067 1068 /* XXX for now, skip the v3 stuff. */ 1069 v3 = FALSE; 1070 1071 /* 1072 * for NFSv2, just write normally. 1073 */ 1074 1075 if (!v3) { 1076 return genfs_gop_write(vp, pgs, npages, flags); 1077 } 1078 1079 /* 1080 * for NFSv3, use delayed writes and the "commit" operation 1081 * to avoid sync writes. 1082 */ 1083 1084 origoffset = pgs[0]->offset; 1085 bytes = npages << PAGE_SHIFT; 1086 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1087 if (nfs_in_committed_range(vp, origoffset, bytes)) { 1088 goto committed; 1089 } 1090 if (nfs_in_tobecommitted_range(vp, origoffset, bytes)) { 1091 if (weak) { 1092 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1093 return 0; 1094 } else { 1095 commitoff = np->n_pushlo; 1096 commitbytes = (uint32_t)(np->n_pushhi - np->n_pushlo); 1097 goto commit; 1098 } 1099 } else { 1100 commitoff = origoffset; 1101 commitbytes = npages << PAGE_SHIFT; 1102 } 1103 simple_lock(&uobj->vmobjlock); 1104 for (i = 0; i < npages; i++) { 1105 pgs[i]->flags |= PG_NEEDCOMMIT|PG_RDONLY; 1106 pgs[i]->flags &= ~PG_CLEAN; 1107 } 1108 simple_unlock(&uobj->vmobjlock); 1109 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1110 error = genfs_gop_write(vp, pgs, npages, flags); 1111 if (error) { 1112 return error; 1113 } 1114 lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL); 1115 if (weak) { 1116 nfs_add_tobecommitted_range(vp, origoffset, 1117 npages << PAGE_SHIFT); 1118 } else { 1119 commit: 1120 error = nfs_commit(vp, commitoff, commitbytes, curproc); 1121 nfs_del_tobecommitted_range(vp, commitoff, commitbytes); 1122 committed: 1123 simple_lock(&uobj->vmobjlock); 1124 for (i = 0; i < npages; i++) { 1125 pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY); 1126 } 1127 simple_unlock(&uobj->vmobjlock); 1128 } 1129 lockmgr(&np->n_commitlock, LK_RELEASE, NULL); 1130 return error; 1131 } 1132