1 /* $NetBSD: nfs_bio.c,v 1.36 1997/10/19 20:27:32 fvdl Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 39 */ 40 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/resourcevar.h> 45 #include <sys/signalvar.h> 46 #include <sys/proc.h> 47 #include <sys/buf.h> 48 #include <sys/vnode.h> 49 #include <sys/trace.h> 50 #include <sys/mount.h> 51 #include <sys/kernel.h> 52 #include <sys/namei.h> 53 #include <sys/dirent.h> 54 55 #include <vm/vm.h> 56 57 #include <nfs/rpcv2.h> 58 #include <nfs/nfsproto.h> 59 #include <nfs/nfs.h> 60 #include <nfs/nfsmount.h> 61 #include <nfs/nqnfs.h> 62 #include <nfs/nfsnode.h> 63 #include <nfs/nfs_var.h> 64 65 extern int nfs_numasync; 66 extern struct nfsstats nfsstats; 67 68 /* 69 * Vnode op for read using bio 70 * Any similarity to readip() is purely coincidental 71 */ 72 int 73 nfs_bioread(vp, uio, ioflag, cred, cflag) 74 register struct vnode *vp; 75 register struct uio *uio; 76 int ioflag, cflag; 77 struct ucred *cred; 78 { 79 register struct nfsnode *np = VTONFS(vp); 80 register int biosize, diff; 81 struct buf *bp = NULL, *rabp; 82 struct vattr vattr; 83 struct proc *p; 84 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 85 struct nfsdircache *ndp = NULL, *nndp = NULL; 86 daddr_t lbn, bn, rabn; 87 caddr_t baddr, ep, edp; 88 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin, en, enn; 89 int enough = 0; 90 struct dirent *dp, *pdp; 91 off_t curoff = 0; 92 93 #ifdef DIAGNOSTIC 94 if (uio->uio_rw != UIO_READ) 95 panic("nfs_read mode"); 96 #endif 97 if (uio->uio_resid == 0) 98 return (0); 99 if (vp->v_type != VDIR && uio->uio_offset < 0) 100 return (EINVAL); 101 p = uio->uio_procp; 102 if ((nmp->nm_flag & NFSMNT_NFSV3) && 103 !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) 104 (void)nfs_fsinfo(nmp, vp, cred, p); 105 if (vp->v_type != VDIR && 106 (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 107 return (EFBIG); 108 biosize = nmp->nm_rsize; 109 /* 110 * For nfs, cache consistency can only be maintained approximately. 111 * Although RFC1094 does not specify the criteria, the following is 112 * believed to be compatible with the reference port. 113 * For nqnfs, full cache consistency is maintained within the loop. 114 * For nfs: 115 * If the file's modify time on the server has changed since the 116 * last read rpc or you have written to the file, 117 * you may have lost data cache consistency with the 118 * server, so flush all of the file's data out of the cache. 119 * Then force a getattr rpc to ensure that you have up to date 120 * attributes. 121 * NB: This implies that cache data can be read when up to 122 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 123 * attributes this could be forced by setting n_attrstamp to 0 before 124 * the VOP_GETATTR() call. 125 */ 126 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 127 if (np->n_flag & NMODIFIED) { 128 if (vp->v_type != VREG) { 129 if (vp->v_type != VDIR) 130 panic("nfs: bioread, not dir"); 131 nfs_invaldircache(vp, 0); 132 np->n_direofoffset = 0; 133 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 134 if (error) 135 return (error); 136 } 137 np->n_attrstamp = 0; 138 error = VOP_GETATTR(vp, &vattr, cred, p); 139 if (error) 140 return (error); 141 np->n_mtime = vattr.va_mtime.tv_sec; 142 } else { 143 error = VOP_GETATTR(vp, &vattr, cred, p); 144 if (error) 145 return (error); 146 if (np->n_mtime != vattr.va_mtime.tv_sec) { 147 if (vp->v_type == VDIR) { 148 nfs_invaldircache(vp, 0); 149 np->n_direofoffset = 0; 150 } 151 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 152 if (error) 153 return (error); 154 np->n_mtime = vattr.va_mtime.tv_sec; 155 } 156 } 157 } 158 do { 159 160 /* 161 * Get a valid lease. If cached data is stale, flush it. 162 */ 163 if (nmp->nm_flag & NFSMNT_NQNFS) { 164 if (NQNFS_CKINVALID(vp, np, ND_READ)) { 165 do { 166 error = nqnfs_getlease(vp, ND_READ, cred, p); 167 } while (error == NQNFS_EXPIRED); 168 if (error) 169 return (error); 170 if (np->n_lrev != np->n_brev || 171 (np->n_flag & NQNFSNONCACHE) || 172 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 173 if (vp->v_type == VDIR) { 174 nfs_invaldircache(vp, 0); 175 np->n_direofoffset = 0; 176 } 177 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 178 if (error) 179 return (error); 180 np->n_brev = np->n_lrev; 181 } 182 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 183 nfs_invaldircache(vp, 0); 184 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 185 np->n_direofoffset = 0; 186 if (error) 187 return (error); 188 } 189 } 190 /* 191 * Don't cache symlinks. 192 */ 193 if (np->n_flag & NQNFSNONCACHE 194 || ((vp->v_flag & VROOT) && vp->v_type == VLNK)) { 195 switch (vp->v_type) { 196 case VREG: 197 return (nfs_readrpc(vp, uio, cred)); 198 case VLNK: 199 return (nfs_readlinkrpc(vp, uio, cred)); 200 case VDIR: 201 break; 202 default: 203 printf(" NQNFSNONCACHE: type %x unexpected\n", 204 vp->v_type); 205 }; 206 } 207 baddr = (caddr_t)0; 208 switch (vp->v_type) { 209 case VREG: 210 nfsstats.biocache_reads++; 211 lbn = uio->uio_offset / biosize; 212 on = uio->uio_offset & (biosize - 1); 213 bn = lbn * (biosize / DEV_BSIZE); 214 not_readin = 1; 215 216 /* 217 * Start the read ahead(s), as required. 218 */ 219 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 220 lbn - 1 == vp->v_lastr) { 221 for (nra = 0; nra < nmp->nm_readahead && 222 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 223 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 224 if (!incore(vp, rabn)) { 225 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 226 if (!rabp) 227 return (EINTR); 228 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 229 rabp->b_flags |= (B_READ | B_ASYNC); 230 if (nfs_asyncio(rabp, cred)) { 231 rabp->b_flags |= B_INVAL; 232 brelse(rabp); 233 } 234 } else 235 brelse(rabp); 236 } 237 } 238 } 239 240 /* 241 * If the block is in the cache and has the required data 242 * in a valid region, just copy it out. 243 * Otherwise, get the block and write back/read in, 244 * as required. 245 */ 246 if ((bp = incore(vp, bn)) && 247 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 248 (B_BUSY | B_WRITEINPROG)) 249 got_buf = 0; 250 else { 251 again: 252 bp = nfs_getcacheblk(vp, bn, biosize, p); 253 if (!bp) 254 return (EINTR); 255 got_buf = 1; 256 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 257 bp->b_flags |= B_READ; 258 not_readin = 0; 259 error = nfs_doio(bp, cred, p); 260 if (error) { 261 brelse(bp); 262 return (error); 263 } 264 } 265 } 266 n = min((unsigned)(biosize - on), uio->uio_resid); 267 diff = np->n_size - uio->uio_offset; 268 if (diff < n) 269 n = diff; 270 if (not_readin && n > 0) { 271 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 272 if (!got_buf) { 273 bp = nfs_getcacheblk(vp, bn, biosize, p); 274 if (!bp) 275 return (EINTR); 276 got_buf = 1; 277 } 278 bp->b_flags |= B_INVAFTERWRITE; 279 if (bp->b_dirtyend > 0) { 280 if ((bp->b_flags & B_DELWRI) == 0) 281 panic("nfsbioread"); 282 if (VOP_BWRITE(bp) == EINTR) 283 return (EINTR); 284 } else 285 brelse(bp); 286 goto again; 287 } 288 } 289 vp->v_lastr = lbn; 290 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 291 if (diff < n) 292 n = diff; 293 break; 294 case VLNK: 295 nfsstats.biocache_readlinks++; 296 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 297 if (!bp) 298 return (EINTR); 299 if ((bp->b_flags & B_DONE) == 0) { 300 bp->b_flags |= B_READ; 301 error = nfs_doio(bp, cred, p); 302 if (error) { 303 brelse(bp); 304 return (error); 305 } 306 } 307 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 308 got_buf = 1; 309 on = 0; 310 break; 311 case VDIR: 312 diragain: 313 nfsstats.biocache_readdirs++; 314 ndp = nfs_searchdircache(vp, uio->uio_offset, 315 (nmp->nm_flag & NFSMNT_XLATECOOKIE), 0); 316 if (!ndp) { 317 /* 318 * We've been handed a cookie that is not 319 * in the cache. If we're not translating 320 * 32 <-> 64, it may be a value that was 321 * flushed out of the cache because it grew 322 * too big. Let the server judge if it's 323 * valid or not. In the translation case, 324 * we have no way of validating this value, 325 * so punt. 326 */ 327 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) 328 return (EINVAL); 329 ndp = nfs_enterdircache(vp, uio->uio_offset, 330 uio->uio_offset, 0, 0); 331 } 332 333 if (uio->uio_offset != 0 && 334 ndp->dc_cookie == np->n_direofoffset) { 335 nfsstats.direofcache_hits++; 336 return (0); 337 } 338 339 bp = nfs_getcacheblk(vp, ndp->dc_blkno, NFS_DIRBLKSIZ, p); 340 if (!bp) 341 return (EINTR); 342 if ((bp->b_flags & B_DONE) == 0) { 343 bp->b_flags |= B_READ; 344 bp->b_dcookie = ndp->dc_blkcookie; 345 error = nfs_doio(bp, cred, p); 346 if (error) { 347 /* 348 * Yuck! The directory has been modified on the 349 * server. Punt and let the userland code 350 * deal with it. 351 */ 352 brelse(bp); 353 if (error == NFSERR_BAD_COOKIE) { 354 nfs_invaldircache(vp, 0); 355 nfs_vinvalbuf(vp, 0, cred, p, 1); 356 error = EINVAL; 357 } 358 return (error); 359 } 360 } 361 362 /* 363 * Find the entry we were looking for in the block. 364 */ 365 366 en = ndp->dc_entry; 367 368 pdp = dp = (struct dirent *)bp->b_data; 369 edp = bp->b_data + bp->b_validend; 370 enn = 0; 371 while (enn < en && (caddr_t)dp < edp) { 372 pdp = dp; 373 dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); 374 enn++; 375 } 376 377 /* 378 * If the entry number was bigger than the number of 379 * entries in the block, or the cookie of the previous 380 * entry doesn't match, the directory cache is 381 * stale. Flush it and try again (i.e. go to 382 * the server). 383 */ 384 if ((caddr_t)dp >= edp || (caddr_t)dp + dp->d_reclen > edp || 385 (en > 0 && NFS_GETCOOKIE(pdp) != ndp->dc_cookie)) { 386 #ifdef DEBUG 387 printf("invalid cache: %p %p %p len %u off %lx %lx\n", 388 pdp, dp, edp, dp->d_reclen, 389 (unsigned long)uio->uio_offset, 390 (unsigned long)NFS_GETCOOKIE(pdp)); 391 #endif 392 brelse(bp); 393 nfs_invaldircache(vp, 0); 394 nfs_vinvalbuf(vp, 0, cred, p, 0); 395 goto diragain; 396 } 397 398 on = (caddr_t)dp - bp->b_data; 399 400 /* 401 * Cache all entries that may be exported to the 402 * user, as they may be thrown back at us. The 403 * NFSBIO_CACHECOOKIES flag indicates that all 404 * entries are being 'exported', so cache them all. 405 */ 406 407 if (en == 0 && pdp == dp) { 408 dp = (struct dirent *) 409 ((caddr_t)dp + dp->d_reclen); 410 enn++; 411 } 412 413 if (uio->uio_resid < (bp->b_validend - on)) { 414 n = uio->uio_resid; 415 enough = 1; 416 } else 417 n = bp->b_validend - on; 418 419 ep = bp->b_data + on + n; 420 421 /* 422 * Find last complete entry to copy, caching entries 423 * (if requested) as we go. 424 */ 425 426 while ((caddr_t)dp < ep && (caddr_t)dp + dp->d_reclen <= ep) { 427 if (cflag & NFSBIO_CACHECOOKIES) { 428 nndp = nfs_enterdircache(vp, NFS_GETCOOKIE(pdp), 429 ndp->dc_blkcookie, enn, bp->b_lblkno); 430 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 431 NFS_STASHCOOKIE32(pdp, 432 nndp->dc_cookie32); 433 } 434 } 435 pdp = dp; 436 dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); 437 enn++; 438 } 439 440 /* 441 * If the last requested entry was not the last in the 442 * buffer (happens if NFS_DIRFRAGSIZ < NFS_DIRBLKSIZ), 443 * cache the cookie of the last requested one, and 444 * set of the offset to it. 445 */ 446 447 if ((on + n) < bp->b_validend) { 448 curoff = NFS_GETCOOKIE(pdp); 449 nndp = nfs_enterdircache(vp, curoff, ndp->dc_blkcookie, 450 enn, bp->b_lblkno); 451 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 452 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); 453 curoff = nndp->dc_cookie32; 454 } 455 } else 456 curoff = bp->b_dcookie; 457 458 /* 459 * Always cache the entry for the next block, 460 * so that readaheads can use it. 461 */ 462 nndp = nfs_enterdircache(vp, bp->b_dcookie, bp->b_dcookie, 0,0); 463 if (nmp->nm_flag & NFSMNT_XLATECOOKIE) { 464 if (curoff == bp->b_dcookie) { 465 NFS_STASHCOOKIE32(pdp, nndp->dc_cookie32); 466 curoff = nndp->dc_cookie32; 467 } 468 } 469 470 n = ((caddr_t)pdp + pdp->d_reclen) - (bp->b_data + on); 471 472 /* 473 * If not eof and read aheads are enabled, start one. 474 * (You need the current block first, so that you have the 475 * directory offset cookie of the next block.) 476 */ 477 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 478 np->n_direofoffset == 0 && !(np->n_flag & NQNFSNONCACHE)) { 479 rabp = nfs_getcacheblk(vp, nndp->dc_blkno, 480 NFS_DIRBLKSIZ, p); 481 if (rabp) { 482 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 483 rabp->b_dcookie = nndp->dc_cookie; 484 rabp->b_flags |= (B_READ | B_ASYNC); 485 if (nfs_asyncio(rabp, cred)) { 486 rabp->b_flags |= B_INVAL; 487 brelse(rabp); 488 } 489 } else 490 brelse(rabp); 491 } 492 } 493 got_buf = 1; 494 break; 495 default: 496 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 497 break; 498 }; 499 500 if (n > 0) { 501 if (!baddr) 502 baddr = bp->b_data; 503 error = uiomove(baddr + on, (int)n, uio); 504 } 505 switch (vp->v_type) { 506 case VREG: 507 break; 508 case VLNK: 509 n = 0; 510 break; 511 case VDIR: 512 if (np->n_flag & NQNFSNONCACHE) 513 bp->b_flags |= B_INVAL; 514 uio->uio_offset = curoff; 515 if (enough) 516 n = 0; 517 break; 518 default: 519 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 520 } 521 if (got_buf) 522 brelse(bp); 523 } while (error == 0 && uio->uio_resid > 0 && n > 0); 524 return (error); 525 } 526 527 /* 528 * Vnode op for write using bio 529 */ 530 int 531 nfs_write(v) 532 void *v; 533 { 534 struct vop_write_args /* { 535 struct vnode *a_vp; 536 struct uio *a_uio; 537 int a_ioflag; 538 struct ucred *a_cred; 539 } */ *ap = v; 540 register int biosize; 541 register struct uio *uio = ap->a_uio; 542 struct proc *p = uio->uio_procp; 543 register struct vnode *vp = ap->a_vp; 544 struct nfsnode *np = VTONFS(vp); 545 register struct ucred *cred = ap->a_cred; 546 int ioflag = ap->a_ioflag; 547 struct buf *bp; 548 struct vattr vattr; 549 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 550 daddr_t lbn, bn; 551 int n, on, error = 0, iomode, must_commit; 552 553 #ifdef DIAGNOSTIC 554 if (uio->uio_rw != UIO_WRITE) 555 panic("nfs_write mode"); 556 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 557 panic("nfs_write proc"); 558 #endif 559 if (vp->v_type != VREG) 560 return (EIO); 561 if (np->n_flag & NWRITEERR) { 562 np->n_flag &= ~NWRITEERR; 563 return (np->n_error); 564 } 565 if ((nmp->nm_flag & NFSMNT_NFSV3) && 566 !(nmp->nm_iflag & NFSMNT_GOTFSINFO)) 567 (void)nfs_fsinfo(nmp, vp, cred, p); 568 if (ioflag & (IO_APPEND | IO_SYNC)) { 569 if (np->n_flag & NMODIFIED) { 570 np->n_attrstamp = 0; 571 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 572 if (error) 573 return (error); 574 } 575 if (ioflag & IO_APPEND) { 576 np->n_attrstamp = 0; 577 error = VOP_GETATTR(vp, &vattr, cred, p); 578 if (error) 579 return (error); 580 uio->uio_offset = np->n_size; 581 } 582 } 583 if (uio->uio_offset < 0) 584 return (EINVAL); 585 if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize) 586 return (EFBIG); 587 if (uio->uio_resid == 0) 588 return (0); 589 /* 590 * Maybe this should be above the vnode op call, but so long as 591 * file servers have no limits, i don't think it matters 592 */ 593 if (p && uio->uio_offset + uio->uio_resid > 594 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 595 psignal(p, SIGXFSZ); 596 return (EFBIG); 597 } 598 /* 599 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 600 * will be the same size within a filesystem. nfs_writerpc will 601 * still use nm_wsize when sizing the rpc's. 602 */ 603 biosize = nmp->nm_rsize; 604 do { 605 606 /* 607 * XXX make sure we aren't cached in the VM page cache 608 */ 609 (void)vnode_pager_uncache(vp); 610 611 /* 612 * Check for a valid write lease. 613 */ 614 if ((nmp->nm_flag & NFSMNT_NQNFS) && 615 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 616 do { 617 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 618 } while (error == NQNFS_EXPIRED); 619 if (error) 620 return (error); 621 if (np->n_lrev != np->n_brev || 622 (np->n_flag & NQNFSNONCACHE)) { 623 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 624 if (error) 625 return (error); 626 np->n_brev = np->n_lrev; 627 } 628 } 629 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { 630 iomode = NFSV3WRITE_FILESYNC; 631 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); 632 if (must_commit) 633 nfs_clearcommit(vp->v_mount); 634 return (error); 635 } 636 nfsstats.biocache_writes++; 637 lbn = uio->uio_offset / biosize; 638 on = uio->uio_offset & (biosize-1); 639 n = min((unsigned)(biosize - on), uio->uio_resid); 640 bn = lbn * (biosize / DEV_BSIZE); 641 again: 642 bp = nfs_getcacheblk(vp, bn, biosize, p); 643 if (!bp) 644 return (EINTR); 645 if (bp->b_wcred == NOCRED) { 646 crhold(cred); 647 bp->b_wcred = cred; 648 } 649 np->n_flag |= NMODIFIED; 650 if (uio->uio_offset + n > np->n_size) { 651 np->n_size = uio->uio_offset + n; 652 vnode_pager_setsize(vp, np->n_size); 653 } 654 655 /* 656 * If the new write will leave a contiguous dirty 657 * area, just update the b_dirtyoff and b_dirtyend, 658 * otherwise force a write rpc of the old dirty area. 659 */ 660 if (bp->b_dirtyend > 0 && 661 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 662 bp->b_proc = p; 663 if (VOP_BWRITE(bp) == EINTR) 664 return (EINTR); 665 goto again; 666 } 667 668 /* 669 * Check for valid write lease and get one as required. 670 * In case getblk() and/or bwrite() delayed us. 671 */ 672 if ((nmp->nm_flag & NFSMNT_NQNFS) && 673 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 674 do { 675 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 676 } while (error == NQNFS_EXPIRED); 677 if (error) { 678 brelse(bp); 679 return (error); 680 } 681 if (np->n_lrev != np->n_brev || 682 (np->n_flag & NQNFSNONCACHE)) { 683 brelse(bp); 684 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 685 if (error) 686 return (error); 687 np->n_brev = np->n_lrev; 688 goto again; 689 } 690 } 691 error = uiomove((char *)bp->b_data + on, n, uio); 692 if (error) { 693 bp->b_flags |= B_ERROR; 694 brelse(bp); 695 return (error); 696 } 697 if (bp->b_dirtyend > 0) { 698 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 699 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 700 } else { 701 bp->b_dirtyoff = on; 702 bp->b_dirtyend = on + n; 703 } 704 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 705 bp->b_validoff > bp->b_dirtyend) { 706 bp->b_validoff = bp->b_dirtyoff; 707 bp->b_validend = bp->b_dirtyend; 708 } else { 709 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 710 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 711 } 712 713 /* 714 * Since this block is being modified, it must be written 715 * again and not just committed. 716 */ 717 bp->b_flags &= ~B_NEEDCOMMIT; 718 719 /* 720 * If the lease is non-cachable or IO_SYNC do bwrite(). 721 */ 722 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 723 bp->b_proc = p; 724 error = VOP_BWRITE(bp); 725 if (error) 726 return (error); 727 if (np->n_flag & NQNFSNONCACHE) { 728 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 729 if (error) 730 return (error); 731 } 732 } else if ((n + on) == biosize && 733 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 734 bp->b_proc = (struct proc *)0; 735 bp->b_flags |= B_ASYNC; 736 (void)nfs_writebp(bp, 0); 737 } else { 738 bdwrite(bp); 739 } 740 } while (uio->uio_resid > 0 && n > 0); 741 return (0); 742 } 743 744 /* 745 * Get an nfs cache block. 746 * Allocate a new one if the block isn't currently in the cache 747 * and return the block marked busy. If the calling process is 748 * interrupted by a signal for an interruptible mount point, return 749 * NULL. 750 */ 751 struct buf * 752 nfs_getcacheblk(vp, bn, size, p) 753 struct vnode *vp; 754 daddr_t bn; 755 int size; 756 struct proc *p; 757 { 758 register struct buf *bp; 759 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 760 761 if (nmp->nm_flag & NFSMNT_INT) { 762 bp = getblk(vp, bn, size, PCATCH, 0); 763 while (bp == (struct buf *)0) { 764 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 765 return ((struct buf *)0); 766 bp = getblk(vp, bn, size, 0, 2 * hz); 767 } 768 } else 769 bp = getblk(vp, bn, size, 0, 0); 770 return (bp); 771 } 772 773 /* 774 * Flush and invalidate all dirty buffers. If another process is already 775 * doing the flush, just wait for completion. 776 */ 777 int 778 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 779 struct vnode *vp; 780 int flags; 781 struct ucred *cred; 782 struct proc *p; 783 int intrflg; 784 { 785 register struct nfsnode *np = VTONFS(vp); 786 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 787 int error = 0, slpflag, slptimeo; 788 789 if ((nmp->nm_flag & NFSMNT_INT) == 0) 790 intrflg = 0; 791 if (intrflg) { 792 slpflag = PCATCH; 793 slptimeo = 2 * hz; 794 } else { 795 slpflag = 0; 796 slptimeo = 0; 797 } 798 /* 799 * First wait for any other process doing a flush to complete. 800 */ 801 while (np->n_flag & NFLUSHINPROG) { 802 np->n_flag |= NFLUSHWANT; 803 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 804 slptimeo); 805 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 806 return (EINTR); 807 } 808 809 /* 810 * Now, flush as required. 811 */ 812 np->n_flag |= NFLUSHINPROG; 813 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 814 while (error) { 815 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 816 np->n_flag &= ~NFLUSHINPROG; 817 if (np->n_flag & NFLUSHWANT) { 818 np->n_flag &= ~NFLUSHWANT; 819 wakeup((caddr_t)&np->n_flag); 820 } 821 return (EINTR); 822 } 823 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 824 } 825 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 826 if (np->n_flag & NFLUSHWANT) { 827 np->n_flag &= ~NFLUSHWANT; 828 wakeup((caddr_t)&np->n_flag); 829 } 830 return (0); 831 } 832 833 /* 834 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 835 * This is mainly to avoid queueing async I/O requests when the nfsiods 836 * are all hung on a dead server. 837 */ 838 int 839 nfs_asyncio(bp, cred) 840 register struct buf *bp; 841 struct ucred *cred; 842 { 843 register int i; 844 register struct nfsmount *nmp; 845 int gotiod, slpflag = 0, slptimeo = 0, error; 846 847 if (nfs_numasync == 0) 848 return (EIO); 849 850 851 nmp = VFSTONFS(bp->b_vp->v_mount); 852 again: 853 if (nmp->nm_flag & NFSMNT_INT) 854 slpflag = PCATCH; 855 gotiod = FALSE; 856 857 /* 858 * Find a free iod to process this request. 859 */ 860 861 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 862 if (nfs_iodwant[i]) { 863 /* 864 * Found one, so wake it up and tell it which 865 * mount to process. 866 */ 867 nfs_iodwant[i] = (struct proc *)0; 868 nfs_iodmount[i] = nmp; 869 nmp->nm_bufqiods++; 870 wakeup((caddr_t)&nfs_iodwant[i]); 871 gotiod = TRUE; 872 break; 873 } 874 /* 875 * If none are free, we may already have an iod working on this mount 876 * point. If so, it will process our request. 877 */ 878 if (!gotiod && nmp->nm_bufqiods > 0) 879 gotiod = TRUE; 880 881 /* 882 * If we have an iod which can process the request, then queue 883 * the buffer. 884 */ 885 if (gotiod) { 886 /* 887 * Ensure that the queue never grows too large. 888 */ 889 while (nmp->nm_bufqlen >= 2*nfs_numasync) { 890 nmp->nm_bufqwant = TRUE; 891 error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, 892 "nfsaio", slptimeo); 893 if (error) { 894 if (nfs_sigintr(nmp, NULL, bp->b_proc)) 895 return (EINTR); 896 if (slpflag == PCATCH) { 897 slpflag = 0; 898 slptimeo = 2 * hz; 899 } 900 } 901 /* 902 * We might have lost our iod while sleeping, 903 * so check and loop if nescessary. 904 */ 905 if (nmp->nm_bufqiods == 0) 906 goto again; 907 } 908 909 if (bp->b_flags & B_READ) { 910 if (bp->b_rcred == NOCRED && cred != NOCRED) { 911 crhold(cred); 912 bp->b_rcred = cred; 913 } 914 } else { 915 bp->b_flags |= B_WRITEINPROG; 916 if (bp->b_wcred == NOCRED && cred != NOCRED) { 917 crhold(cred); 918 bp->b_wcred = cred; 919 } 920 } 921 922 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); 923 nmp->nm_bufqlen++; 924 return (0); 925 } 926 927 /* 928 * All the iods are busy on other mounts, so return EIO to 929 * force the caller to process the i/o synchronously. 930 */ 931 return (EIO); 932 } 933 934 /* 935 * Do an I/O operation to/from a cache block. This may be called 936 * synchronously or from an nfsiod. 937 */ 938 int 939 nfs_doio(bp, cr, p) 940 register struct buf *bp; 941 struct ucred *cr; 942 struct proc *p; 943 { 944 register struct uio *uiop; 945 register struct vnode *vp; 946 struct nfsnode *np; 947 struct nfsmount *nmp; 948 int error = 0, diff, len, iomode, must_commit = 0; 949 struct uio uio; 950 struct iovec io; 951 952 vp = bp->b_vp; 953 np = VTONFS(vp); 954 nmp = VFSTONFS(vp->v_mount); 955 uiop = &uio; 956 uiop->uio_iov = &io; 957 uiop->uio_iovcnt = 1; 958 uiop->uio_segflg = UIO_SYSSPACE; 959 uiop->uio_procp = p; 960 961 /* 962 * Historically, paging was done with physio, but no more... 963 */ 964 if (bp->b_flags & B_PHYS) { 965 /* 966 * ...though reading /dev/drum still gets us here. 967 */ 968 io.iov_len = uiop->uio_resid = bp->b_bcount; 969 /* mapping was done by vmapbuf() */ 970 io.iov_base = bp->b_data; 971 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 972 if (bp->b_flags & B_READ) { 973 uiop->uio_rw = UIO_READ; 974 nfsstats.read_physios++; 975 error = nfs_readrpc(vp, uiop, cr); 976 } else { 977 iomode = NFSV3WRITE_DATASYNC; 978 uiop->uio_rw = UIO_WRITE; 979 nfsstats.write_physios++; 980 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); 981 } 982 if (error) { 983 bp->b_flags |= B_ERROR; 984 bp->b_error = error; 985 } 986 } else if (bp->b_flags & B_READ) { 987 io.iov_len = uiop->uio_resid = bp->b_bcount; 988 io.iov_base = bp->b_data; 989 uiop->uio_rw = UIO_READ; 990 switch (vp->v_type) { 991 case VREG: 992 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 993 nfsstats.read_bios++; 994 error = nfs_readrpc(vp, uiop, cr); 995 if (!error) { 996 bp->b_validoff = 0; 997 if (uiop->uio_resid) { 998 /* 999 * If len > 0, there is a hole in the file and 1000 * no writes after the hole have been pushed to 1001 * the server yet. 1002 * Just zero fill the rest of the valid area. 1003 */ 1004 diff = bp->b_bcount - uiop->uio_resid; 1005 len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE 1006 + diff); 1007 if (len > 0) { 1008 len = min(len, uiop->uio_resid); 1009 bzero((char *)bp->b_data + diff, len); 1010 bp->b_validend = diff + len; 1011 } else 1012 bp->b_validend = diff; 1013 } else 1014 bp->b_validend = bp->b_bcount; 1015 } 1016 if (p && (vp->v_flag & VTEXT) && 1017 (((nmp->nm_flag & NFSMNT_NQNFS) && 1018 NQNFS_CKINVALID(vp, np, ND_READ) && 1019 np->n_lrev != np->n_brev) || 1020 (!(nmp->nm_flag & NFSMNT_NQNFS) && 1021 np->n_mtime != np->n_vattr->va_mtime.tv_sec))) { 1022 uprintf("Process killed due to text file modification\n"); 1023 psignal(p, SIGKILL); 1024 p->p_holdcnt++; 1025 } 1026 break; 1027 case VLNK: 1028 uiop->uio_offset = (off_t)0; 1029 nfsstats.readlink_bios++; 1030 error = nfs_readlinkrpc(vp, uiop, cr); 1031 break; 1032 case VDIR: 1033 nfsstats.readdir_bios++; 1034 uiop->uio_offset = bp->b_dcookie; 1035 if (nmp->nm_flag & NFSMNT_RDIRPLUS) { 1036 error = nfs_readdirplusrpc(vp, uiop, cr); 1037 if (error == NFSERR_NOTSUPP) 1038 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 1039 } 1040 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) 1041 error = nfs_readdirrpc(vp, uiop, cr); 1042 if (!error) { 1043 bp->b_dcookie = uiop->uio_offset; 1044 bp->b_validoff = 0; 1045 bp->b_validend = bp->b_bcount - uiop->uio_resid; 1046 } 1047 break; 1048 default: 1049 printf("nfs_doio: type %x unexpected\n",vp->v_type); 1050 break; 1051 }; 1052 if (error) { 1053 bp->b_flags |= B_ERROR; 1054 bp->b_error = error; 1055 } 1056 } else { 1057 io.iov_len = uiop->uio_resid = bp->b_dirtyend 1058 - bp->b_dirtyoff; 1059 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE 1060 + bp->b_dirtyoff; 1061 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 1062 uiop->uio_rw = UIO_WRITE; 1063 nfsstats.write_bios++; 1064 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) 1065 iomode = NFSV3WRITE_UNSTABLE; 1066 else 1067 iomode = NFSV3WRITE_FILESYNC; 1068 bp->b_flags |= B_WRITEINPROG; 1069 #ifdef fvdl_debug 1070 printf("nfs_doio(%x): bp %x doff %d dend %d\n", 1071 vp, bp, bp->b_dirtyoff, bp->b_dirtyend); 1072 #endif 1073 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); 1074 if (!error && iomode == NFSV3WRITE_UNSTABLE) 1075 bp->b_flags |= B_NEEDCOMMIT; 1076 else 1077 bp->b_flags &= ~B_NEEDCOMMIT; 1078 bp->b_flags &= ~B_WRITEINPROG; 1079 1080 /* 1081 * For an interrupted write, the buffer is still valid and the 1082 * write hasn't been pushed to the server yet, so we can't set 1083 * B_ERROR and report the interruption by setting B_EINTR. For 1084 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 1085 * is essentially a noop. 1086 * For the case of a V3 write rpc not being committed to stable 1087 * storage, the block is still dirty and requires either a commit 1088 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC 1089 * before the block is reused. This is indicated by setting the 1090 * B_DELWRI and B_NEEDCOMMIT flags. 1091 */ 1092 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 1093 bp->b_flags |= B_DELWRI; 1094 1095 /* 1096 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 1097 * buffer to the clean list, we have to reassign it back to the 1098 * dirty one. Ugh. 1099 */ 1100 if (bp->b_flags & B_ASYNC) 1101 reassignbuf(bp, vp); 1102 else if (error) 1103 bp->b_flags |= B_EINTR; 1104 } else { 1105 if (error) { 1106 bp->b_flags |= B_ERROR; 1107 bp->b_error = np->n_error = error; 1108 np->n_flag |= NWRITEERR; 1109 } 1110 bp->b_dirtyoff = bp->b_dirtyend = 0; 1111 } 1112 } 1113 bp->b_resid = uiop->uio_resid; 1114 if (must_commit) 1115 nfs_clearcommit(vp->v_mount); 1116 biodone(bp); 1117 return (error); 1118 } 1119