1 /* $NetBSD: nfs_bio.c,v 1.20 1995/03/18 05:49:51 gwr Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/resourcevar.h> 44 #include <sys/proc.h> 45 #include <sys/buf.h> 46 #include <sys/vnode.h> 47 #include <sys/trace.h> 48 #include <sys/mount.h> 49 #include <sys/kernel.h> 50 51 #include <vm/vm.h> 52 53 #include <nfs/nfsnode.h> 54 #include <nfs/rpcv2.h> 55 #include <nfs/nfsv2.h> 56 #include <nfs/nfs.h> 57 #include <nfs/nfsmount.h> 58 #include <nfs/nqnfs.h> 59 60 struct buf *incore(), *nfs_getcacheblk(); 61 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 62 extern int nfs_numasync; 63 64 /* 65 * Vnode op for read using bio 66 * Any similarity to readip() is purely coincidental 67 */ 68 nfs_bioread(vp, uio, ioflag, cred) 69 register struct vnode *vp; 70 register struct uio *uio; 71 int ioflag; 72 struct ucred *cred; 73 { 74 register struct nfsnode *np = VTONFS(vp); 75 register int biosize, diff; 76 struct buf *bp, *rabp; 77 struct vattr vattr; 78 struct proc *p; 79 struct nfsmount *nmp; 80 daddr_t lbn, bn, rabn; 81 caddr_t baddr; 82 int got_buf, nra, error = 0, n, on, not_readin; 83 84 #ifdef lint 85 ioflag = ioflag; 86 #endif /* lint */ 87 #ifdef DIAGNOSTIC 88 if (uio->uio_rw != UIO_READ) 89 panic("nfs_read mode"); 90 #endif 91 if (uio->uio_resid == 0) 92 return (0); 93 if (uio->uio_offset < 0 && vp->v_type != VDIR) 94 return (EINVAL); 95 nmp = VFSTONFS(vp->v_mount); 96 biosize = nmp->nm_rsize; 97 p = uio->uio_procp; 98 /* 99 * For nfs, cache consistency can only be maintained approximately. 100 * Although RFC1094 does not specify the criteria, the following is 101 * believed to be compatible with the reference port. 102 * For nqnfs, full cache consistency is maintained within the loop. 103 * For nfs: 104 * If the file's modify time on the server has changed since the 105 * last read rpc or you have written to the file, 106 * you may have lost data cache consistency with the 107 * server, so flush all of the file's data out of the cache. 108 * Then force a getattr rpc to ensure that you have up to date 109 * attributes. 110 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 111 * the ones changing the modify time. 112 * NB: This implies that cache data can be read when up to 113 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 114 * attributes this could be forced by setting n_attrstamp to 0 before 115 * the VOP_GETATTR() call. 116 */ 117 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 118 if (np->n_flag & NMODIFIED) { 119 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 120 vp->v_type != VREG) { 121 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 122 return (error); 123 } 124 np->n_attrstamp = 0; 125 np->n_direofoffset = 0; 126 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 127 return (error); 128 np->n_mtime = vattr.va_mtime.ts_sec; 129 } else { 130 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 131 return (error); 132 if (np->n_mtime != vattr.va_mtime.ts_sec) { 133 np->n_direofoffset = 0; 134 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 135 return (error); 136 np->n_mtime = vattr.va_mtime.ts_sec; 137 } 138 } 139 } 140 do { 141 142 /* 143 * Get a valid lease. If cached data is stale, flush it. 144 */ 145 if (nmp->nm_flag & NFSMNT_NQNFS) { 146 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 147 do { 148 error = nqnfs_getlease(vp, NQL_READ, cred, p); 149 } while (error == NQNFS_EXPIRED); 150 if (error) 151 return (error); 152 if (np->n_lrev != np->n_brev || 153 (np->n_flag & NQNFSNONCACHE) || 154 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 155 if (vp->v_type == VDIR) { 156 np->n_direofoffset = 0; 157 cache_purge(vp); 158 } 159 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 160 return (error); 161 np->n_brev = np->n_lrev; 162 } 163 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 164 np->n_direofoffset = 0; 165 cache_purge(vp); 166 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 167 return (error); 168 } 169 } 170 if (np->n_flag & NQNFSNONCACHE) { 171 switch (vp->v_type) { 172 case VREG: 173 error = nfs_readrpc(vp, uio, cred); 174 break; 175 case VLNK: 176 error = nfs_readlinkrpc(vp, uio, cred); 177 break; 178 case VDIR: 179 error = nfs_readdirrpc(vp, uio, cred); 180 break; 181 }; 182 return (error); 183 } 184 baddr = (caddr_t)0; 185 switch (vp->v_type) { 186 case VREG: 187 nfsstats.biocache_reads++; 188 lbn = uio->uio_offset / biosize; 189 on = uio->uio_offset & (biosize-1); 190 bn = lbn * (biosize / DEV_BSIZE); 191 not_readin = 1; 192 193 /* 194 * Start the read ahead(s), as required. 195 */ 196 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 197 lbn == vp->v_lastr + 1) { 198 for (nra = 0; nra < nmp->nm_readahead && 199 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 200 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 201 if (!incore(vp, rabn)) { 202 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 203 if (!rabp) 204 return (EINTR); 205 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 206 rabp->b_flags |= (B_READ | B_ASYNC); 207 if (nfs_asyncio(rabp, cred)) { 208 rabp->b_flags |= B_INVAL; 209 brelse(rabp); 210 } 211 } else 212 brelse(rabp); 213 } 214 } 215 } 216 217 /* 218 * If the block is in the cache and has the required data 219 * in a valid region, just copy it out. 220 * Otherwise, get the block and write back/read in, 221 * as required. 222 */ 223 if ((bp = incore(vp, bn)) && 224 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 225 (B_BUSY | B_WRITEINPROG)) 226 got_buf = 0; 227 else { 228 again: 229 bp = nfs_getcacheblk(vp, bn, biosize, p); 230 if (!bp) 231 return (EINTR); 232 got_buf = 1; 233 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 234 bp->b_flags |= B_READ; 235 not_readin = 0; 236 if (error = nfs_doio(bp, cred, p)) { 237 brelse(bp); 238 return (error); 239 } 240 } 241 } 242 n = min((unsigned)(biosize - on), uio->uio_resid); 243 diff = np->n_size - uio->uio_offset; 244 if (diff < n) 245 n = diff; 246 if (not_readin && n > 0) { 247 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 248 if (!got_buf) { 249 bp = nfs_getcacheblk(vp, bn, biosize, p); 250 if (!bp) 251 return (EINTR); 252 got_buf = 1; 253 } 254 bp->b_flags |= B_INVAL; 255 if (bp->b_dirtyend > 0) { 256 if ((bp->b_flags & B_DELWRI) == 0) 257 panic("nfsbioread"); 258 if (VOP_BWRITE(bp) == EINTR) 259 return (EINTR); 260 } else 261 brelse(bp); 262 goto again; 263 } 264 } 265 vp->v_lastr = lbn; 266 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 267 if (diff < n) 268 n = diff; 269 break; 270 case VLNK: 271 nfsstats.biocache_readlinks++; 272 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 273 if (!bp) 274 return (EINTR); 275 if ((bp->b_flags & B_DONE) == 0) { 276 bp->b_flags |= B_READ; 277 if (error = nfs_doio(bp, cred, p)) { 278 brelse(bp); 279 return (error); 280 } 281 } 282 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 283 got_buf = 1; 284 on = 0; 285 break; 286 case VDIR: 287 if (uio->uio_resid < NFS_DIRBLKSIZ) 288 return (0); 289 nfsstats.biocache_readdirs++; 290 bn = (daddr_t)uio->uio_offset; 291 bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p); 292 if (!bp) 293 return (EINTR); 294 if ((bp->b_flags & B_DONE) == 0) { 295 bp->b_flags |= B_READ; 296 if (error = nfs_doio(bp, cred, p)) { 297 brelse(bp); 298 return (error); 299 } 300 } 301 302 /* 303 * If not eof and read aheads are enabled, start one. 304 * (You need the current block first, so that you have the 305 * directory offset cookie of the next block. 306 */ 307 rabn = bp->b_blkno; 308 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 309 rabn != 0 && rabn != np->n_direofoffset && 310 !incore(vp, rabn)) { 311 rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); 312 if (rabp) { 313 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 314 rabp->b_flags |= (B_READ | B_ASYNC); 315 if (nfs_asyncio(rabp, cred)) { 316 rabp->b_flags |= B_INVAL; 317 brelse(rabp); 318 } 319 } else 320 brelse(rabp); 321 } 322 } 323 on = 0; 324 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 325 got_buf = 1; 326 break; 327 }; 328 329 if (n > 0) { 330 if (!baddr) 331 baddr = bp->b_data; 332 error = uiomove(baddr + on, (int)n, uio); 333 } 334 switch (vp->v_type) { 335 case VREG: 336 if (n + on == biosize || uio->uio_offset == np->n_size) 337 bp->b_flags |= B_AGE; 338 break; 339 case VLNK: 340 n = 0; 341 break; 342 case VDIR: 343 uio->uio_offset = bp->b_blkno; 344 break; 345 }; 346 if (got_buf) 347 brelse(bp); 348 } while (error == 0 && uio->uio_resid > 0 && n > 0); 349 return (error); 350 } 351 352 /* 353 * Vnode op for write using bio 354 */ 355 nfs_write(ap) 356 struct vop_write_args /* { 357 struct vnode *a_vp; 358 struct uio *a_uio; 359 int a_ioflag; 360 struct ucred *a_cred; 361 } */ *ap; 362 { 363 register int biosize; 364 register struct uio *uio = ap->a_uio; 365 struct proc *p = uio->uio_procp; 366 register struct vnode *vp = ap->a_vp; 367 struct nfsnode *np = VTONFS(vp); 368 register struct ucred *cred = ap->a_cred; 369 int ioflag = ap->a_ioflag; 370 struct buf *bp; 371 struct vattr vattr; 372 struct nfsmount *nmp; 373 daddr_t lbn, bn; 374 int n, on, error = 0; 375 376 #ifdef DIAGNOSTIC 377 if (uio->uio_rw != UIO_WRITE) 378 panic("nfs_write mode"); 379 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 380 panic("nfs_write proc"); 381 #endif 382 if (vp->v_type != VREG) 383 return (EIO); 384 if (np->n_flag & NWRITEERR) { 385 np->n_flag &= ~NWRITEERR; 386 return (np->n_error); 387 } 388 if (ioflag & (IO_APPEND | IO_SYNC)) { 389 if (np->n_flag & NMODIFIED) { 390 np->n_attrstamp = 0; 391 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 392 return (error); 393 } 394 if (ioflag & IO_APPEND) { 395 np->n_attrstamp = 0; 396 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 397 return (error); 398 uio->uio_offset = np->n_size; 399 } 400 } 401 nmp = VFSTONFS(vp->v_mount); 402 if (uio->uio_offset < 0) 403 return (EINVAL); 404 if (uio->uio_resid == 0) 405 return (0); 406 /* 407 * Maybe this should be above the vnode op call, but so long as 408 * file servers have no limits, i don't think it matters 409 */ 410 if (p && uio->uio_offset + uio->uio_resid > 411 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 412 psignal(p, SIGXFSZ); 413 return (EFBIG); 414 } 415 /* 416 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 417 * will be the same size within a filesystem. nfs_writerpc will 418 * still use nm_wsize when sizing the rpc's. 419 */ 420 biosize = nmp->nm_rsize; 421 do { 422 423 /* 424 * XXX make sure we aren't cached in the VM page cache 425 */ 426 (void)vnode_pager_uncache(vp); 427 428 /* 429 * Check for a valid write lease. 430 * If non-cachable, just do the rpc 431 */ 432 if ((nmp->nm_flag & NFSMNT_NQNFS) && 433 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 434 do { 435 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 436 } while (error == NQNFS_EXPIRED); 437 if (error) 438 return (error); 439 if (np->n_lrev != np->n_brev || 440 (np->n_flag & NQNFSNONCACHE)) { 441 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 442 return (error); 443 np->n_brev = np->n_lrev; 444 } 445 } 446 if (np->n_flag & NQNFSNONCACHE) 447 return (nfs_writerpc(vp, uio, cred, ioflag)); 448 nfsstats.biocache_writes++; 449 lbn = uio->uio_offset / biosize; 450 on = uio->uio_offset & (biosize-1); 451 n = min((unsigned)(biosize - on), uio->uio_resid); 452 bn = lbn * (biosize / DEV_BSIZE); 453 again: 454 bp = nfs_getcacheblk(vp, bn, biosize, p); 455 if (!bp) 456 return (EINTR); 457 if (bp->b_wcred == NOCRED) { 458 crhold(cred); 459 bp->b_wcred = cred; 460 } 461 np->n_flag |= NMODIFIED; 462 if (uio->uio_offset + n > np->n_size) { 463 np->n_size = uio->uio_offset + n; 464 vnode_pager_setsize(vp, (u_long)np->n_size); 465 } 466 467 /* 468 * If the new write will leave a contiguous dirty 469 * area, just update the b_dirtyoff and b_dirtyend, 470 * otherwise force a write rpc of the old dirty area. 471 */ 472 if (bp->b_dirtyend > 0 && 473 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 474 bp->b_proc = p; 475 if (VOP_BWRITE(bp) == EINTR) 476 return (EINTR); 477 goto again; 478 } 479 480 /* 481 * Check for valid write lease and get one as required. 482 * In case getblk() and/or bwrite() delayed us. 483 */ 484 if ((nmp->nm_flag & NFSMNT_NQNFS) && 485 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 486 do { 487 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 488 } while (error == NQNFS_EXPIRED); 489 if (error) { 490 brelse(bp); 491 return (error); 492 } 493 if (np->n_lrev != np->n_brev || 494 (np->n_flag & NQNFSNONCACHE)) { 495 brelse(bp); 496 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 497 return (error); 498 np->n_brev = np->n_lrev; 499 goto again; 500 } 501 } 502 if (error = uiomove((char *)bp->b_data + on, n, uio)) { 503 bp->b_flags |= B_ERROR; 504 brelse(bp); 505 return (error); 506 } 507 if (bp->b_dirtyend > 0) { 508 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 509 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 510 } else { 511 bp->b_dirtyoff = on; 512 bp->b_dirtyend = on + n; 513 } 514 #ifndef notdef 515 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 516 bp->b_validoff > bp->b_dirtyend) { 517 bp->b_validoff = bp->b_dirtyoff; 518 bp->b_validend = bp->b_dirtyend; 519 } else { 520 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 521 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 522 } 523 #else 524 bp->b_validoff = bp->b_dirtyoff; 525 bp->b_validend = bp->b_dirtyend; 526 #endif 527 if (ioflag & IO_APPEND) 528 bp->b_flags |= B_APPENDWRITE; 529 530 /* 531 * If the lease is non-cachable or IO_SYNC do bwrite(). 532 */ 533 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 534 bp->b_proc = p; 535 if (error = VOP_BWRITE(bp)) 536 return (error); 537 } else if ((n + on) == biosize && 538 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 539 bp->b_proc = (struct proc *)0; 540 bawrite(bp); 541 } else 542 bdwrite(bp); 543 } while (uio->uio_resid > 0 && n > 0); 544 return (0); 545 } 546 547 /* 548 * Get an nfs cache block. 549 * Allocate a new one if the block isn't currently in the cache 550 * and return the block marked busy. If the calling process is 551 * interrupted by a signal for an interruptible mount point, return 552 * NULL. 553 */ 554 struct buf * 555 nfs_getcacheblk(vp, bn, size, p) 556 struct vnode *vp; 557 daddr_t bn; 558 int size; 559 struct proc *p; 560 { 561 register struct buf *bp; 562 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 563 564 if (nmp->nm_flag & NFSMNT_INT) { 565 bp = getblk(vp, bn, size, PCATCH, 0); 566 while (bp == (struct buf *)0) { 567 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 568 return ((struct buf *)0); 569 bp = getblk(vp, bn, size, 0, 2 * hz); 570 } 571 } else 572 bp = getblk(vp, bn, size, 0, 0); 573 return (bp); 574 } 575 576 /* 577 * Flush and invalidate all dirty buffers. If another process is already 578 * doing the flush, just wait for completion. 579 */ 580 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 581 struct vnode *vp; 582 int flags; 583 struct ucred *cred; 584 struct proc *p; 585 int intrflg; 586 { 587 register struct nfsnode *np = VTONFS(vp); 588 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 589 int error = 0, slpflag, slptimeo; 590 591 if ((nmp->nm_flag & NFSMNT_INT) == 0) 592 intrflg = 0; 593 if (intrflg) { 594 slpflag = PCATCH; 595 slptimeo = 2 * hz; 596 } else { 597 slpflag = 0; 598 slptimeo = 0; 599 } 600 /* 601 * First wait for any other process doing a flush to complete. 602 */ 603 while (np->n_flag & NFLUSHINPROG) { 604 np->n_flag |= NFLUSHWANT; 605 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 606 slptimeo); 607 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 608 return (EINTR); 609 } 610 611 /* 612 * Now, flush as required. 613 */ 614 np->n_flag |= NFLUSHINPROG; 615 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 616 while (error) { 617 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 618 np->n_flag &= ~NFLUSHINPROG; 619 if (np->n_flag & NFLUSHWANT) { 620 np->n_flag &= ~NFLUSHWANT; 621 wakeup((caddr_t)&np->n_flag); 622 } 623 return (EINTR); 624 } 625 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 626 } 627 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 628 if (np->n_flag & NFLUSHWANT) { 629 np->n_flag &= ~NFLUSHWANT; 630 wakeup((caddr_t)&np->n_flag); 631 } 632 return (0); 633 } 634 635 /* 636 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 637 * This is mainly to avoid queueing async I/O requests when the nfsiods 638 * are all hung on a dead server. 639 */ 640 nfs_asyncio(bp, cred) 641 register struct buf *bp; 642 struct ucred *cred; 643 { 644 register int i; 645 646 if (nfs_numasync == 0) 647 return (EIO); 648 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 649 if (nfs_iodwant[i]) { 650 if (bp->b_flags & B_READ) { 651 if (bp->b_rcred == NOCRED && cred != NOCRED) { 652 crhold(cred); 653 bp->b_rcred = cred; 654 } 655 } else { 656 if (bp->b_wcred == NOCRED && cred != NOCRED) { 657 crhold(cred); 658 bp->b_wcred = cred; 659 } 660 } 661 662 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 663 nfs_iodwant[i] = (struct proc *)0; 664 wakeup((caddr_t)&nfs_iodwant[i]); 665 return (0); 666 } 667 return (EIO); 668 } 669 670 /* 671 * Do an I/O operation to/from a cache block. This may be called 672 * synchronously or from an nfsiod. 673 */ 674 int 675 nfs_doio(bp, cr, p) 676 register struct buf *bp; 677 struct cred *cr; 678 struct proc *p; 679 { 680 register struct uio *uiop; 681 register struct vnode *vp; 682 struct nfsnode *np; 683 struct nfsmount *nmp; 684 int error, diff, len; 685 struct uio uio; 686 struct iovec io; 687 688 vp = bp->b_vp; 689 np = VTONFS(vp); 690 nmp = VFSTONFS(vp->v_mount); 691 uiop = &uio; 692 uiop->uio_iov = &io; 693 uiop->uio_iovcnt = 1; 694 uiop->uio_segflg = UIO_SYSSPACE; 695 uiop->uio_procp = p; 696 697 /* 698 * Historically, paging was done with physio, but no more... 699 */ 700 if (bp->b_flags & B_PHYS) { 701 /* 702 * ...though reading /dev/drum still gets us here. 703 */ 704 io.iov_len = uiop->uio_resid = bp->b_bcount; 705 /* mapping was done by vmapbuf() */ 706 io.iov_base = bp->b_data; 707 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 708 if (bp->b_flags & B_READ) { 709 uiop->uio_rw = UIO_READ; 710 nfsstats.read_physios++; 711 error = nfs_readrpc(vp, uiop, cr); 712 } else { 713 uiop->uio_rw = UIO_WRITE; 714 nfsstats.write_physios++; 715 error = nfs_writerpc(vp, uiop, cr, 0); 716 } 717 if (error) { 718 bp->b_flags |= B_ERROR; 719 bp->b_error = error; 720 } 721 } else if (bp->b_flags & B_READ) { 722 io.iov_len = uiop->uio_resid = bp->b_bcount; 723 io.iov_base = bp->b_data; 724 uiop->uio_rw = UIO_READ; 725 switch (vp->v_type) { 726 case VREG: 727 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 728 nfsstats.read_bios++; 729 error = nfs_readrpc(vp, uiop, cr); 730 if (!error) { 731 bp->b_validoff = 0; 732 if (uiop->uio_resid) { 733 /* 734 * If len > 0, there is a hole in the file and 735 * no writes after the hole have been pushed to 736 * the server yet. 737 * Just zero fill the rest of the valid area. 738 */ 739 diff = bp->b_bcount - uiop->uio_resid; 740 len = np->n_size - (bp->b_blkno * DEV_BSIZE 741 + diff); 742 if (len > 0) { 743 len = min(len, uiop->uio_resid); 744 bzero((char *)bp->b_data + diff, len); 745 bp->b_validend = diff + len; 746 } else 747 bp->b_validend = diff; 748 } else 749 bp->b_validend = bp->b_bcount; 750 } 751 if (p && (vp->v_flag & VTEXT) && 752 (((nmp->nm_flag & NFSMNT_NQNFS) && 753 NQNFS_CKINVALID(vp, np, NQL_READ) && 754 np->n_lrev != np->n_brev) || 755 (!(nmp->nm_flag & NFSMNT_NQNFS) && 756 np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { 757 uprintf("Process killed due to text file modification\n"); 758 psignal(p, SIGKILL); 759 p->p_holdcnt++; 760 } 761 break; 762 case VLNK: 763 uiop->uio_offset = 0; 764 nfsstats.readlink_bios++; 765 error = nfs_readlinkrpc(vp, uiop, cr); 766 break; 767 case VDIR: 768 uiop->uio_offset = bp->b_lblkno; 769 nfsstats.readdir_bios++; 770 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) 771 error = nfs_readdirlookrpc(vp, uiop, cr); 772 else 773 error = nfs_readdirrpc(vp, uiop, cr); 774 /* 775 * Save offset cookie in b_blkno. 776 */ 777 bp->b_blkno = uiop->uio_offset; 778 break; 779 }; 780 if (error) { 781 bp->b_flags |= B_ERROR; 782 bp->b_error = error; 783 } 784 } else { 785 io.iov_len = uiop->uio_resid = bp->b_dirtyend 786 - bp->b_dirtyoff; 787 uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) 788 + bp->b_dirtyoff; 789 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 790 uiop->uio_rw = UIO_WRITE; 791 nfsstats.write_bios++; 792 if (bp->b_flags & B_APPENDWRITE) 793 error = nfs_writerpc(vp, uiop, cr, IO_APPEND); 794 else 795 error = nfs_writerpc(vp, uiop, cr, 0); 796 bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); 797 798 /* 799 * For an interrupted write, the buffer is still valid and the 800 * write hasn't been pushed to the server yet, so we can't set 801 * B_ERROR and report the interruption by setting B_EINTR. For 802 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 803 * is essentially a noop. 804 */ 805 if (error == EINTR) { 806 bp->b_flags &= ~B_INVAL; 807 bp->b_flags |= B_DELWRI; 808 809 /* 810 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 811 * buffer to the clean list, we have to reassign it back to the 812 * dirty one. Ugh. 813 */ 814 if (bp->b_flags & B_ASYNC) 815 reassignbuf(bp, vp); 816 else 817 bp->b_flags |= B_EINTR; 818 } else { 819 if (error) { 820 bp->b_flags |= B_ERROR; 821 bp->b_error = np->n_error = error; 822 np->n_flag |= NWRITEERR; 823 } 824 bp->b_dirtyoff = bp->b_dirtyend = 0; 825 } 826 } 827 bp->b_resid = uiop->uio_resid; 828 biodone(bp); 829 return (error); 830 } 831