1 /* $NetBSD: nfs_bio.c,v 1.17 1994/07/20 04:16:01 mycroft Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94 39 */ 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/resourcevar.h> 44 #include <sys/proc.h> 45 #include <sys/buf.h> 46 #include <sys/vnode.h> 47 #include <sys/trace.h> 48 #include <sys/mount.h> 49 #include <sys/kernel.h> 50 51 #include <vm/vm.h> 52 53 #include <nfs/nfsnode.h> 54 #include <nfs/rpcv2.h> 55 #include <nfs/nfsv2.h> 56 #include <nfs/nfs.h> 57 #include <nfs/nfsmount.h> 58 #include <nfs/nqnfs.h> 59 60 struct buf *incore(), *nfs_getcacheblk(); 61 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 62 extern int nfs_numasync; 63 64 /* 65 * Vnode op for read using bio 66 * Any similarity to readip() is purely coincidental 67 */ 68 nfs_bioread(vp, uio, ioflag, cred) 69 register struct vnode *vp; 70 register struct uio *uio; 71 int ioflag; 72 struct ucred *cred; 73 { 74 register struct nfsnode *np = VTONFS(vp); 75 register int biosize, diff; 76 struct buf *bp, *rabp; 77 struct vattr vattr; 78 struct proc *p; 79 struct nfsmount *nmp; 80 daddr_t lbn, bn, rabn; 81 caddr_t baddr; 82 int got_buf, nra, error = 0, n, on, not_readin; 83 84 #ifdef lint 85 ioflag = ioflag; 86 #endif /* lint */ 87 #ifdef DIAGNOSTIC 88 if (uio->uio_rw != UIO_READ) 89 panic("nfs_read mode"); 90 #endif 91 if (uio->uio_resid == 0) 92 return (0); 93 if (uio->uio_offset < 0 && vp->v_type != VDIR) 94 return (EINVAL); 95 nmp = VFSTONFS(vp->v_mount); 96 biosize = nmp->nm_rsize; 97 p = uio->uio_procp; 98 /* 99 * For nfs, cache consistency can only be maintained approximately. 100 * Although RFC1094 does not specify the criteria, the following is 101 * believed to be compatible with the reference port. 102 * For nqnfs, full cache consistency is maintained within the loop. 103 * For nfs: 104 * If the file's modify time on the server has changed since the 105 * last read rpc or you have written to the file, 106 * you may have lost data cache consistency with the 107 * server, so flush all of the file's data out of the cache. 108 * Then force a getattr rpc to ensure that you have up to date 109 * attributes. 110 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 111 * the ones changing the modify time. 112 * NB: This implies that cache data can be read when up to 113 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 114 * attributes this could be forced by setting n_attrstamp to 0 before 115 * the VOP_GETATTR() call. 116 */ 117 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 118 if (np->n_flag & NMODIFIED) { 119 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 120 vp->v_type != VREG) { 121 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 122 return (error); 123 } 124 np->n_attrstamp = 0; 125 np->n_direofoffset = 0; 126 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 127 return (error); 128 np->n_mtime = vattr.va_mtime.ts_sec; 129 } else { 130 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 131 return (error); 132 if (np->n_mtime != vattr.va_mtime.ts_sec) { 133 np->n_direofoffset = 0; 134 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 135 return (error); 136 np->n_mtime = vattr.va_mtime.ts_sec; 137 } 138 } 139 } 140 do { 141 142 /* 143 * Get a valid lease. If cached data is stale, flush it. 144 */ 145 if (nmp->nm_flag & NFSMNT_NQNFS) { 146 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 147 do { 148 error = nqnfs_getlease(vp, NQL_READ, cred, p); 149 } while (error == NQNFS_EXPIRED); 150 if (error) 151 return (error); 152 if (np->n_lrev != np->n_brev || 153 (np->n_flag & NQNFSNONCACHE) || 154 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 155 if (vp->v_type == VDIR) { 156 np->n_direofoffset = 0; 157 cache_purge(vp); 158 } 159 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 160 return (error); 161 np->n_brev = np->n_lrev; 162 } 163 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 164 np->n_direofoffset = 0; 165 cache_purge(vp); 166 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 167 return (error); 168 } 169 } 170 if (np->n_flag & NQNFSNONCACHE) { 171 switch (vp->v_type) { 172 case VREG: 173 error = nfs_readrpc(vp, uio, cred); 174 break; 175 case VLNK: 176 error = nfs_readlinkrpc(vp, uio, cred); 177 break; 178 case VDIR: 179 error = nfs_readdirrpc(vp, uio, cred); 180 break; 181 }; 182 return (error); 183 } 184 baddr = (caddr_t)0; 185 switch (vp->v_type) { 186 case VREG: 187 nfsstats.biocache_reads++; 188 lbn = uio->uio_offset / biosize; 189 on = uio->uio_offset & (biosize-1); 190 bn = lbn * (biosize / DEV_BSIZE); 191 not_readin = 1; 192 193 /* 194 * Start the read ahead(s), as required. 195 */ 196 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 197 lbn == vp->v_lastr + 1) { 198 for (nra = 0; nra < nmp->nm_readahead && 199 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 200 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 201 if (!incore(vp, rabn)) { 202 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 203 if (!rabp) 204 return (EINTR); 205 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 206 rabp->b_flags |= (B_READ | B_ASYNC); 207 if (nfs_asyncio(rabp, cred)) { 208 rabp->b_flags |= B_INVAL; 209 brelse(rabp); 210 } 211 } 212 } 213 } 214 } 215 216 /* 217 * If the block is in the cache and has the required data 218 * in a valid region, just copy it out. 219 * Otherwise, get the block and write back/read in, 220 * as required. 221 */ 222 if ((bp = incore(vp, bn)) && 223 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 224 (B_BUSY | B_WRITEINPROG)) 225 got_buf = 0; 226 else { 227 again: 228 bp = nfs_getcacheblk(vp, bn, biosize, p); 229 if (!bp) 230 return (EINTR); 231 got_buf = 1; 232 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 233 bp->b_flags |= B_READ; 234 not_readin = 0; 235 if (error = nfs_doio(bp, cred, p)) { 236 brelse(bp); 237 return (error); 238 } 239 } 240 } 241 n = min((unsigned)(biosize - on), uio->uio_resid); 242 diff = np->n_size - uio->uio_offset; 243 if (diff < n) 244 n = diff; 245 if (not_readin && n > 0) { 246 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 247 if (!got_buf) { 248 bp = nfs_getcacheblk(vp, bn, biosize, p); 249 if (!bp) 250 return (EINTR); 251 got_buf = 1; 252 } 253 bp->b_flags |= B_INVAL; 254 if (bp->b_dirtyend > 0) { 255 if ((bp->b_flags & B_DELWRI) == 0) 256 panic("nfsbioread"); 257 if (VOP_BWRITE(bp) == EINTR) 258 return (EINTR); 259 } else 260 brelse(bp); 261 goto again; 262 } 263 } 264 vp->v_lastr = lbn; 265 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 266 if (diff < n) 267 n = diff; 268 break; 269 case VLNK: 270 nfsstats.biocache_readlinks++; 271 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 272 if (!bp) 273 return (EINTR); 274 if ((bp->b_flags & B_DONE) == 0) { 275 bp->b_flags |= B_READ; 276 if (error = nfs_doio(bp, cred, p)) { 277 brelse(bp); 278 return (error); 279 } 280 } 281 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 282 got_buf = 1; 283 on = 0; 284 break; 285 case VDIR: 286 nfsstats.biocache_readdirs++; 287 bn = (daddr_t)uio->uio_offset; 288 bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p); 289 if (!bp) 290 return (EINTR); 291 if ((bp->b_flags & B_DONE) == 0) { 292 bp->b_flags |= B_READ; 293 if (error = nfs_doio(bp, cred, p)) { 294 brelse(bp); 295 return (error); 296 } 297 } 298 299 /* 300 * If not eof and read aheads are enabled, start one. 301 * (You need the current block first, so that you have the 302 * directory offset cookie of the next block. 303 */ 304 rabn = bp->b_blkno; 305 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 306 rabn != 0 && rabn != np->n_direofoffset && 307 !incore(vp, rabn)) { 308 rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p); 309 if (rabp) { 310 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 311 rabp->b_flags |= (B_READ | B_ASYNC); 312 if (nfs_asyncio(rabp, cred)) { 313 rabp->b_flags |= B_INVAL; 314 brelse(rabp); 315 } 316 } 317 } 318 } 319 on = 0; 320 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 321 got_buf = 1; 322 break; 323 }; 324 325 if (n > 0) { 326 if (!baddr) 327 baddr = bp->b_data; 328 error = uiomove(baddr + on, (int)n, uio); 329 } 330 switch (vp->v_type) { 331 case VREG: 332 if (n + on == biosize || uio->uio_offset == np->n_size) 333 bp->b_flags |= B_AGE; 334 break; 335 case VLNK: 336 n = 0; 337 break; 338 case VDIR: 339 uio->uio_offset = bp->b_blkno; 340 break; 341 }; 342 if (got_buf) 343 brelse(bp); 344 } while (error == 0 && uio->uio_resid > 0 && n > 0); 345 return (error); 346 } 347 348 /* 349 * Vnode op for write using bio 350 */ 351 nfs_write(ap) 352 struct vop_write_args /* { 353 struct vnode *a_vp; 354 struct uio *a_uio; 355 int a_ioflag; 356 struct ucred *a_cred; 357 } */ *ap; 358 { 359 register int biosize; 360 register struct uio *uio = ap->a_uio; 361 struct proc *p = uio->uio_procp; 362 register struct vnode *vp = ap->a_vp; 363 struct nfsnode *np = VTONFS(vp); 364 register struct ucred *cred = ap->a_cred; 365 int ioflag = ap->a_ioflag; 366 struct buf *bp; 367 struct vattr vattr; 368 struct nfsmount *nmp; 369 daddr_t lbn, bn; 370 int n, on, error = 0; 371 372 #ifdef DIAGNOSTIC 373 if (uio->uio_rw != UIO_WRITE) 374 panic("nfs_write mode"); 375 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 376 panic("nfs_write proc"); 377 #endif 378 if (vp->v_type != VREG) 379 return (EIO); 380 if (np->n_flag & NWRITEERR) { 381 np->n_flag &= ~NWRITEERR; 382 return (np->n_error); 383 } 384 if (ioflag & (IO_APPEND | IO_SYNC)) { 385 if (np->n_flag & NMODIFIED) { 386 np->n_attrstamp = 0; 387 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 388 return (error); 389 } 390 if (ioflag & IO_APPEND) { 391 np->n_attrstamp = 0; 392 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 393 return (error); 394 uio->uio_offset = np->n_size; 395 } 396 } 397 nmp = VFSTONFS(vp->v_mount); 398 if (uio->uio_offset < 0) 399 return (EINVAL); 400 if (uio->uio_resid == 0) 401 return (0); 402 /* 403 * Maybe this should be above the vnode op call, but so long as 404 * file servers have no limits, i don't think it matters 405 */ 406 if (p && uio->uio_offset + uio->uio_resid > 407 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 408 psignal(p, SIGXFSZ); 409 return (EFBIG); 410 } 411 /* 412 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 413 * will be the same size within a filesystem. nfs_writerpc will 414 * still use nm_wsize when sizing the rpc's. 415 */ 416 biosize = nmp->nm_rsize; 417 do { 418 419 /* 420 * XXX make sure we aren't cached in the VM page cache 421 */ 422 (void)vnode_pager_uncache(vp); 423 424 /* 425 * Check for a valid write lease. 426 * If non-cachable, just do the rpc 427 */ 428 if ((nmp->nm_flag & NFSMNT_NQNFS) && 429 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 430 do { 431 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 432 } while (error == NQNFS_EXPIRED); 433 if (error) 434 return (error); 435 if (np->n_lrev != np->n_brev || 436 (np->n_flag & NQNFSNONCACHE)) { 437 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 438 return (error); 439 np->n_brev = np->n_lrev; 440 } 441 } 442 if (np->n_flag & NQNFSNONCACHE) 443 return (nfs_writerpc(vp, uio, cred, ioflag)); 444 nfsstats.biocache_writes++; 445 lbn = uio->uio_offset / biosize; 446 on = uio->uio_offset & (biosize-1); 447 n = min((unsigned)(biosize - on), uio->uio_resid); 448 bn = lbn * (biosize / DEV_BSIZE); 449 again: 450 bp = nfs_getcacheblk(vp, bn, biosize, p); 451 if (!bp) 452 return (EINTR); 453 if (bp->b_wcred == NOCRED) { 454 crhold(cred); 455 bp->b_wcred = cred; 456 } 457 np->n_flag |= NMODIFIED; 458 if (uio->uio_offset + n > np->n_size) { 459 np->n_size = uio->uio_offset + n; 460 vnode_pager_setsize(vp, (u_long)np->n_size); 461 } 462 463 /* 464 * If the new write will leave a contiguous dirty 465 * area, just update the b_dirtyoff and b_dirtyend, 466 * otherwise force a write rpc of the old dirty area. 467 */ 468 if (bp->b_dirtyend > 0 && 469 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 470 bp->b_proc = p; 471 if (VOP_BWRITE(bp) == EINTR) 472 return (EINTR); 473 goto again; 474 } 475 476 /* 477 * Check for valid write lease and get one as required. 478 * In case getblk() and/or bwrite() delayed us. 479 */ 480 if ((nmp->nm_flag & NFSMNT_NQNFS) && 481 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 482 do { 483 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 484 } while (error == NQNFS_EXPIRED); 485 if (error) { 486 brelse(bp); 487 return (error); 488 } 489 if (np->n_lrev != np->n_brev || 490 (np->n_flag & NQNFSNONCACHE)) { 491 brelse(bp); 492 if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1)) 493 return (error); 494 np->n_brev = np->n_lrev; 495 goto again; 496 } 497 } 498 if (error = uiomove((char *)bp->b_data + on, n, uio)) { 499 bp->b_flags |= B_ERROR; 500 brelse(bp); 501 return (error); 502 } 503 if (bp->b_dirtyend > 0) { 504 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 505 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 506 } else { 507 bp->b_dirtyoff = on; 508 bp->b_dirtyend = on + n; 509 } 510 #ifndef notdef 511 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 512 bp->b_validoff > bp->b_dirtyend) { 513 bp->b_validoff = bp->b_dirtyoff; 514 bp->b_validend = bp->b_dirtyend; 515 } else { 516 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 517 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 518 } 519 #else 520 bp->b_validoff = bp->b_dirtyoff; 521 bp->b_validend = bp->b_dirtyend; 522 #endif 523 if (ioflag & IO_APPEND) 524 bp->b_flags |= B_APPENDWRITE; 525 526 /* 527 * If the lease is non-cachable or IO_SYNC do bwrite(). 528 */ 529 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 530 bp->b_proc = p; 531 if (error = VOP_BWRITE(bp)) 532 return (error); 533 } else if ((n + on) == biosize && 534 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 535 bp->b_proc = (struct proc *)0; 536 bawrite(bp); 537 } else 538 bdwrite(bp); 539 } while (uio->uio_resid > 0 && n > 0); 540 return (0); 541 } 542 543 /* 544 * Get an nfs cache block. 545 * Allocate a new one if the block isn't currently in the cache 546 * and return the block marked busy. If the calling process is 547 * interrupted by a signal for an interruptible mount point, return 548 * NULL. 549 */ 550 struct buf * 551 nfs_getcacheblk(vp, bn, size, p) 552 struct vnode *vp; 553 daddr_t bn; 554 int size; 555 struct proc *p; 556 { 557 register struct buf *bp; 558 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 559 560 if (nmp->nm_flag & NFSMNT_INT) { 561 bp = getblk(vp, bn, size, PCATCH, 0); 562 while (bp == (struct buf *)0) { 563 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 564 return ((struct buf *)0); 565 bp = getblk(vp, bn, size, 0, 2 * hz); 566 } 567 } else 568 bp = getblk(vp, bn, size, 0, 0); 569 return (bp); 570 } 571 572 /* 573 * Flush and invalidate all dirty buffers. If another process is already 574 * doing the flush, just wait for completion. 575 */ 576 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 577 struct vnode *vp; 578 int flags; 579 struct ucred *cred; 580 struct proc *p; 581 int intrflg; 582 { 583 register struct nfsnode *np = VTONFS(vp); 584 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 585 int error = 0, slpflag, slptimeo; 586 587 if ((nmp->nm_flag & NFSMNT_INT) == 0) 588 intrflg = 0; 589 if (intrflg) { 590 slpflag = PCATCH; 591 slptimeo = 2 * hz; 592 } else { 593 slpflag = 0; 594 slptimeo = 0; 595 } 596 /* 597 * First wait for any other process doing a flush to complete. 598 */ 599 while (np->n_flag & NFLUSHINPROG) { 600 np->n_flag |= NFLUSHWANT; 601 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 602 slptimeo); 603 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 604 return (EINTR); 605 } 606 607 /* 608 * Now, flush as required. 609 */ 610 np->n_flag |= NFLUSHINPROG; 611 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 612 while (error) { 613 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 614 np->n_flag &= ~NFLUSHINPROG; 615 if (np->n_flag & NFLUSHWANT) { 616 np->n_flag &= ~NFLUSHWANT; 617 wakeup((caddr_t)&np->n_flag); 618 } 619 return (EINTR); 620 } 621 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 622 } 623 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 624 if (np->n_flag & NFLUSHWANT) { 625 np->n_flag &= ~NFLUSHWANT; 626 wakeup((caddr_t)&np->n_flag); 627 } 628 return (0); 629 } 630 631 /* 632 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 633 * This is mainly to avoid queueing async I/O requests when the nfsiods 634 * are all hung on a dead server. 635 */ 636 nfs_asyncio(bp, cred) 637 register struct buf *bp; 638 struct ucred *cred; 639 { 640 register int i; 641 642 if (nfs_numasync == 0) 643 return (EIO); 644 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 645 if (nfs_iodwant[i]) { 646 if (bp->b_flags & B_READ) { 647 if (bp->b_rcred == NOCRED && cred != NOCRED) { 648 crhold(cred); 649 bp->b_rcred = cred; 650 } 651 } else { 652 if (bp->b_wcred == NOCRED && cred != NOCRED) { 653 crhold(cred); 654 bp->b_wcred = cred; 655 } 656 } 657 658 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 659 nfs_iodwant[i] = (struct proc *)0; 660 wakeup((caddr_t)&nfs_iodwant[i]); 661 return (0); 662 } 663 return (EIO); 664 } 665 666 /* 667 * Do an I/O operation to/from a cache block. This may be called 668 * synchronously or from an nfsiod. 669 */ 670 int 671 nfs_doio(bp, cr, p) 672 register struct buf *bp; 673 struct cred *cr; 674 struct proc *p; 675 { 676 register struct uio *uiop; 677 register struct vnode *vp; 678 struct nfsnode *np; 679 struct nfsmount *nmp; 680 int error, diff, len; 681 struct uio uio; 682 struct iovec io; 683 684 vp = bp->b_vp; 685 np = VTONFS(vp); 686 nmp = VFSTONFS(vp->v_mount); 687 uiop = &uio; 688 uiop->uio_iov = &io; 689 uiop->uio_iovcnt = 1; 690 uiop->uio_segflg = UIO_SYSSPACE; 691 uiop->uio_procp = p; 692 693 /* 694 * Historically, paging was done with physio, but no more... 695 */ 696 if (bp->b_flags & B_PHYS) { 697 /* 698 * ...though reading /dev/drum still gets us here. 699 */ 700 io.iov_len = uiop->uio_resid = bp->b_bcount; 701 /* mapping was done by vmapbuf() */ 702 io.iov_base = bp->b_data; 703 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 704 if (bp->b_flags & B_READ) { 705 uiop->uio_rw = UIO_READ; 706 nfsstats.read_physios++; 707 error = nfs_readrpc(vp, uiop, cr); 708 } else { 709 uiop->uio_rw = UIO_WRITE; 710 nfsstats.write_physios++; 711 error = nfs_writerpc(vp, uiop, cr); 712 } 713 if (error) { 714 bp->b_flags |= B_ERROR; 715 bp->b_error = error; 716 } 717 } else if (bp->b_flags & B_READ) { 718 io.iov_len = uiop->uio_resid = bp->b_bcount; 719 io.iov_base = bp->b_data; 720 uiop->uio_rw = UIO_READ; 721 switch (vp->v_type) { 722 case VREG: 723 uiop->uio_offset = bp->b_blkno * DEV_BSIZE; 724 nfsstats.read_bios++; 725 error = nfs_readrpc(vp, uiop, cr); 726 if (!error) { 727 bp->b_validoff = 0; 728 if (uiop->uio_resid) { 729 /* 730 * If len > 0, there is a hole in the file and 731 * no writes after the hole have been pushed to 732 * the server yet. 733 * Just zero fill the rest of the valid area. 734 */ 735 diff = bp->b_bcount - uiop->uio_resid; 736 len = np->n_size - (bp->b_blkno * DEV_BSIZE 737 + diff); 738 if (len > 0) { 739 len = min(len, uiop->uio_resid); 740 bzero((char *)bp->b_data + diff, len); 741 bp->b_validend = diff + len; 742 } else 743 bp->b_validend = diff; 744 } else 745 bp->b_validend = bp->b_bcount; 746 } 747 if (p && (vp->v_flag & VTEXT) && 748 (((nmp->nm_flag & NFSMNT_NQNFS) && 749 NQNFS_CKINVALID(vp, np, NQL_READ) && 750 np->n_lrev != np->n_brev) || 751 (!(nmp->nm_flag & NFSMNT_NQNFS) && 752 np->n_mtime != np->n_vattr.va_mtime.ts_sec))) { 753 uprintf("Process killed due to text file modification\n"); 754 psignal(p, SIGKILL); 755 p->p_holdcnt++; 756 } 757 break; 758 case VLNK: 759 uiop->uio_offset = 0; 760 nfsstats.readlink_bios++; 761 error = nfs_readlinkrpc(vp, uiop, cr); 762 break; 763 case VDIR: 764 uiop->uio_offset = bp->b_lblkno; 765 nfsstats.readdir_bios++; 766 if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) 767 error = nfs_readdirlookrpc(vp, uiop, cr); 768 else 769 error = nfs_readdirrpc(vp, uiop, cr); 770 /* 771 * Save offset cookie in b_blkno. 772 */ 773 bp->b_blkno = uiop->uio_offset; 774 break; 775 }; 776 if (error) { 777 bp->b_flags |= B_ERROR; 778 bp->b_error = error; 779 } 780 } else { 781 io.iov_len = uiop->uio_resid = bp->b_dirtyend 782 - bp->b_dirtyoff; 783 uiop->uio_offset = (bp->b_blkno * DEV_BSIZE) 784 + bp->b_dirtyoff; 785 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 786 uiop->uio_rw = UIO_WRITE; 787 nfsstats.write_bios++; 788 if (bp->b_flags & B_APPENDWRITE) 789 error = nfs_writerpc(vp, uiop, cr, IO_APPEND); 790 else 791 error = nfs_writerpc(vp, uiop, cr, 0); 792 bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE); 793 794 /* 795 * For an interrupted write, the buffer is still valid and the 796 * write hasn't been pushed to the server yet, so we can't set 797 * B_ERROR and report the interruption by setting B_EINTR. For 798 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 799 * is essentially a noop. 800 */ 801 if (error == EINTR) { 802 bp->b_flags &= ~B_INVAL; 803 bp->b_flags |= B_DELWRI; 804 805 /* 806 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 807 * buffer to the clean list, we have to reassign it back to the 808 * dirty one. Ugh. 809 */ 810 if (bp->b_flags & B_ASYNC) 811 reassignbuf(bp, vp); 812 else 813 bp->b_flags |= B_EINTR; 814 } else { 815 if (error) { 816 bp->b_flags |= B_ERROR; 817 bp->b_error = np->n_error = error; 818 np->n_flag |= NWRITEERR; 819 } 820 bp->b_dirtyoff = bp->b_dirtyend = 0; 821 } 822 } 823 bp->b_resid = uiop->uio_resid; 824 biodone(bp); 825 return (error); 826 } 827