1 /* $OpenBSD: nfs_bio.c,v 1.38 2003/06/02 23:28:19 millert Exp $ */ 2 /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Rick Macklem at The University of Guelph. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 36 */ 37 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/resourcevar.h> 42 #include <sys/signalvar.h> 43 #include <sys/proc.h> 44 #include <sys/buf.h> 45 #include <sys/vnode.h> 46 #include <sys/mount.h> 47 #include <sys/kernel.h> 48 #include <sys/namei.h> 49 50 #include <uvm/uvm_extern.h> 51 52 #include <nfs/rpcv2.h> 53 #include <nfs/nfsproto.h> 54 #include <nfs/nfs.h> 55 #include <nfs/nfsmount.h> 56 #include <nfs/nfsnode.h> 57 #include <nfs/nfs_var.h> 58 59 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 60 extern int nfs_numasync; 61 struct nfsstats nfsstats; 62 struct nfs_bufqhead nfs_bufq; 63 64 /* 65 * Vnode op for read using bio 66 * Any similarity to readip() is purely coincidental 67 */ 68 int 69 nfs_bioread(vp, uio, ioflag, cred) 70 struct vnode *vp; 71 struct uio *uio; 72 int ioflag; 73 struct ucred *cred; 74 { 75 struct nfsnode *np = VTONFS(vp); 76 int biosize, diff; 77 struct buf *bp = NULL, *rabp; 78 struct vattr vattr; 79 struct proc *p; 80 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 81 daddr_t lbn, bn, rabn; 82 caddr_t baddr; 83 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; 84 85 #ifdef DIAGNOSTIC 86 if (uio->uio_rw != UIO_READ) 87 panic("nfs_read mode"); 88 #endif 89 if (uio->uio_resid == 0) 90 return (0); 91 if (uio->uio_offset < 0) 92 return (EINVAL); 93 p = uio->uio_procp; 94 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 95 (void)nfs_fsinfo(nmp, vp, cred, p); 96 biosize = nmp->nm_rsize; 97 /* 98 * For nfs, cache consistency can only be maintained approximately. 99 * Although RFC1094 does not specify the criteria, the following is 100 * believed to be compatible with the reference port. 101 * For nfs: 102 * If the file's modify time on the server has changed since the 103 * last read rpc or you have written to the file, 104 * you may have lost data cache consistency with the 105 * server, so flush all of the file's data out of the cache. 106 * Then force a getattr rpc to ensure that you have up to date 107 * attributes. 108 * NB: This implies that cache data can be read when up to 109 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 110 * attributes this could be forced by setting n_attrstamp to 0 before 111 * the VOP_GETATTR() call. 112 */ 113 if (np->n_flag & NMODIFIED) { 114 np->n_attrstamp = 0; 115 error = VOP_GETATTR(vp, &vattr, cred, p); 116 if (error) 117 return (error); 118 np->n_mtime = vattr.va_mtime.tv_sec; 119 } else { 120 error = VOP_GETATTR(vp, &vattr, cred, p); 121 if (error) 122 return (error); 123 if (np->n_mtime != vattr.va_mtime.tv_sec) { 124 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 125 if (error) 126 return (error); 127 np->n_mtime = vattr.va_mtime.tv_sec; 128 } 129 } 130 131 /* 132 * update the cache read creds for this vnode 133 */ 134 if (np->n_rcred) 135 crfree(np->n_rcred); 136 np->n_rcred = cred; 137 crhold(cred); 138 139 do { 140 if ((vp->v_flag & VROOT) && vp->v_type == VLNK) { 141 return (nfs_readlinkrpc(vp, uio, cred)); 142 } 143 baddr = (caddr_t)0; 144 switch (vp->v_type) { 145 case VREG: 146 nfsstats.biocache_reads++; 147 lbn = uio->uio_offset / biosize; 148 on = uio->uio_offset & (biosize - 1); 149 bn = lbn * (biosize / DEV_BSIZE); 150 not_readin = 1; 151 152 /* 153 * Start the read ahead(s), as required. 154 */ 155 if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 156 for (nra = 0; nra < nmp->nm_readahead && 157 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 158 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 159 if (!incore(vp, rabn)) { 160 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 161 if (!rabp) 162 return (EINTR); 163 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 164 rabp->b_flags |= (B_READ | B_ASYNC); 165 if (nfs_asyncio(rabp)) { 166 rabp->b_flags |= B_INVAL; 167 brelse(rabp); 168 } 169 } else 170 brelse(rabp); 171 } 172 } 173 } 174 175 /* 176 * If the block is in the cache and has the required data 177 * in a valid region, just copy it out. 178 * Otherwise, get the block and write back/read in, 179 * as required. 180 */ 181 if ((bp = incore(vp, bn)) && 182 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 183 (B_BUSY | B_WRITEINPROG)) 184 got_buf = 0; 185 else { 186 again: 187 bp = nfs_getcacheblk(vp, bn, biosize, p); 188 if (!bp) 189 return (EINTR); 190 got_buf = 1; 191 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 192 bp->b_flags |= B_READ; 193 not_readin = 0; 194 error = nfs_doio(bp, p); 195 if (error) { 196 brelse(bp); 197 return (error); 198 } 199 } 200 } 201 n = min((unsigned)(biosize - on), uio->uio_resid); 202 diff = np->n_size - uio->uio_offset; 203 if (diff < n) 204 n = diff; 205 if (not_readin && n > 0) { 206 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 207 if (!got_buf) { 208 bp = nfs_getcacheblk(vp, bn, biosize, p); 209 if (!bp) 210 return (EINTR); 211 got_buf = 1; 212 } 213 bp->b_flags |= B_INVAFTERWRITE; 214 if (bp->b_dirtyend > 0) { 215 if ((bp->b_flags & B_DELWRI) == 0) 216 panic("nfsbioread"); 217 if (VOP_BWRITE(bp) == EINTR) 218 return (EINTR); 219 } else 220 brelse(bp); 221 goto again; 222 } 223 } 224 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 225 if (diff < n) 226 n = diff; 227 break; 228 case VLNK: 229 nfsstats.biocache_readlinks++; 230 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 231 if (!bp) 232 return (EINTR); 233 if ((bp->b_flags & B_DONE) == 0) { 234 bp->b_flags |= B_READ; 235 error = nfs_doio(bp, p); 236 if (error) { 237 brelse(bp); 238 return (error); 239 } 240 } 241 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 242 got_buf = 1; 243 on = 0; 244 break; 245 default: 246 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 247 break; 248 } 249 250 if (n > 0) { 251 if (!baddr) 252 baddr = bp->b_data; 253 error = uiomove(baddr + on, (int)n, uio); 254 } 255 switch (vp->v_type) { 256 case VREG: 257 break; 258 case VLNK: 259 n = 0; 260 break; 261 default: 262 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 263 } 264 if (got_buf) 265 brelse(bp); 266 } while (error == 0 && uio->uio_resid > 0 && n > 0); 267 return (error); 268 } 269 270 /* 271 * Vnode op for write using bio 272 */ 273 int 274 nfs_write(v) 275 void *v; 276 { 277 struct vop_write_args /* { 278 struct vnode *a_vp; 279 struct uio *a_uio; 280 int a_ioflag; 281 struct ucred *a_cred; 282 } */ *ap = v; 283 int biosize; 284 struct uio *uio = ap->a_uio; 285 struct proc *p = uio->uio_procp; 286 struct vnode *vp = ap->a_vp; 287 struct nfsnode *np = VTONFS(vp); 288 struct ucred *cred = ap->a_cred; 289 int ioflag = ap->a_ioflag; 290 struct buf *bp; 291 struct vattr vattr; 292 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 293 daddr_t lbn, bn; 294 int n, on, error = 0; 295 296 #ifdef DIAGNOSTIC 297 if (uio->uio_rw != UIO_WRITE) 298 panic("nfs_write mode"); 299 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 300 panic("nfs_write proc"); 301 #endif 302 if (vp->v_type != VREG) 303 return (EIO); 304 if (np->n_flag & NWRITEERR) { 305 np->n_flag &= ~NWRITEERR; 306 return (np->n_error); 307 } 308 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 309 (void)nfs_fsinfo(nmp, vp, cred, p); 310 if (ioflag & (IO_APPEND | IO_SYNC)) { 311 if (np->n_flag & NMODIFIED) { 312 np->n_attrstamp = 0; 313 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 314 if (error) 315 return (error); 316 } 317 if (ioflag & IO_APPEND) { 318 np->n_attrstamp = 0; 319 error = VOP_GETATTR(vp, &vattr, cred, p); 320 if (error) 321 return (error); 322 uio->uio_offset = np->n_size; 323 } 324 } 325 if (uio->uio_offset < 0) 326 return (EINVAL); 327 if (uio->uio_resid == 0) 328 return (0); 329 /* 330 * Maybe this should be above the vnode op call, but so long as 331 * file servers have no limits, i don't think it matters 332 */ 333 if (p && uio->uio_offset + uio->uio_resid > 334 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 335 psignal(p, SIGXFSZ); 336 return (EFBIG); 337 } 338 339 /* 340 * update the cache write creds for this node. 341 */ 342 if (np->n_wcred) 343 crfree(np->n_wcred); 344 np->n_wcred = cred; 345 crhold(cred); 346 347 /* 348 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 349 * will be the same size within a filesystem. nfs_writerpc will 350 * still use nm_wsize when sizing the rpc's. 351 */ 352 biosize = nmp->nm_rsize; 353 do { 354 355 /* 356 * XXX make sure we aren't cached in the VM page cache 357 */ 358 uvm_vnp_uncache(vp); 359 360 nfsstats.biocache_writes++; 361 lbn = uio->uio_offset / biosize; 362 on = uio->uio_offset & (biosize-1); 363 n = min((unsigned)(biosize - on), uio->uio_resid); 364 bn = lbn * (biosize / DEV_BSIZE); 365 again: 366 bp = nfs_getcacheblk(vp, bn, biosize, p); 367 if (!bp) 368 return (EINTR); 369 np->n_flag |= NMODIFIED; 370 if (uio->uio_offset + n > np->n_size) { 371 np->n_size = uio->uio_offset + n; 372 uvm_vnp_setsize(vp, (u_long)np->n_size); 373 } 374 375 /* 376 * If the new write will leave a contiguous dirty 377 * area, just update the b_dirtyoff and b_dirtyend, 378 * otherwise force a write rpc of the old dirty area. 379 */ 380 if (bp->b_dirtyend > 0 && 381 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 382 bp->b_proc = p; 383 if (VOP_BWRITE(bp) == EINTR) 384 return (EINTR); 385 goto again; 386 } 387 388 error = uiomove((char *)bp->b_data + on, n, uio); 389 if (error) { 390 bp->b_flags |= B_ERROR; 391 brelse(bp); 392 return (error); 393 } 394 if (bp->b_dirtyend > 0) { 395 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 396 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 397 } else { 398 bp->b_dirtyoff = on; 399 bp->b_dirtyend = on + n; 400 } 401 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 402 bp->b_validoff > bp->b_dirtyend) { 403 bp->b_validoff = bp->b_dirtyoff; 404 bp->b_validend = bp->b_dirtyend; 405 } else { 406 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 407 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 408 } 409 410 /* 411 * Since this block is being modified, it must be written 412 * again and not just committed. 413 */ 414 bp->b_flags &= ~B_NEEDCOMMIT; 415 416 /* 417 * If the lease is non-cachable or IO_SYNC do bwrite(). 418 */ 419 if (ioflag & IO_SYNC) { 420 bp->b_proc = p; 421 error = VOP_BWRITE(bp); 422 if (error) 423 return (error); 424 } else if ((n + on) == biosize) { 425 bp->b_proc = (struct proc *)0; 426 bp->b_flags |= B_ASYNC; 427 (void)nfs_writebp(bp, 0); 428 } else { 429 bdwrite(bp); 430 } 431 } while (uio->uio_resid > 0 && n > 0); 432 return (0); 433 } 434 435 /* 436 * Get an nfs cache block. 437 * Allocate a new one if the block isn't currently in the cache 438 * and return the block marked busy. If the calling process is 439 * interrupted by a signal for an interruptible mount point, return 440 * NULL. 441 */ 442 struct buf * 443 nfs_getcacheblk(vp, bn, size, p) 444 struct vnode *vp; 445 daddr_t bn; 446 int size; 447 struct proc *p; 448 { 449 struct buf *bp; 450 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 451 452 if (nmp->nm_flag & NFSMNT_INT) { 453 bp = getblk(vp, bn, size, PCATCH, 0); 454 while (bp == (struct buf *)0) { 455 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 456 return ((struct buf *)0); 457 bp = getblk(vp, bn, size, 0, 2 * hz); 458 } 459 } else 460 bp = getblk(vp, bn, size, 0, 0); 461 return (bp); 462 } 463 464 /* 465 * Flush and invalidate all dirty buffers. If another process is already 466 * doing the flush, just wait for completion. 467 */ 468 int 469 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 470 struct vnode *vp; 471 int flags; 472 struct ucred *cred; 473 struct proc *p; 474 int intrflg; 475 { 476 struct nfsnode *np = VTONFS(vp); 477 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 478 int error = 0, slpflag, slptimeo; 479 480 if ((nmp->nm_flag & NFSMNT_INT) == 0) 481 intrflg = 0; 482 if (intrflg) { 483 slpflag = PCATCH; 484 slptimeo = 2 * hz; 485 } else { 486 slpflag = 0; 487 slptimeo = 0; 488 } 489 /* 490 * First wait for any other process doing a flush to complete. 491 */ 492 while (np->n_flag & NFLUSHINPROG) { 493 np->n_flag |= NFLUSHWANT; 494 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 495 slptimeo); 496 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 497 return (EINTR); 498 } 499 500 /* 501 * Now, flush as required. 502 */ 503 np->n_flag |= NFLUSHINPROG; 504 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 505 while (error) { 506 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 507 np->n_flag &= ~NFLUSHINPROG; 508 if (np->n_flag & NFLUSHWANT) { 509 np->n_flag &= ~NFLUSHWANT; 510 wakeup((caddr_t)&np->n_flag); 511 } 512 return (EINTR); 513 } 514 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 515 } 516 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 517 if (np->n_flag & NFLUSHWANT) { 518 np->n_flag &= ~NFLUSHWANT; 519 wakeup((caddr_t)&np->n_flag); 520 } 521 return (0); 522 } 523 524 /* 525 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 526 * This is mainly to avoid queueing async I/O requests when the nfsiods 527 * are all hung on a dead server. 528 */ 529 int 530 nfs_asyncio(bp) 531 struct buf *bp; 532 { 533 int i,s; 534 535 if (nfs_numasync == 0) 536 return (EIO); 537 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 538 if (nfs_iodwant[i]) { 539 if ((bp->b_flags & B_READ) == 0) { 540 bp->b_flags |= B_WRITEINPROG; 541 } 542 543 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 544 nfs_iodwant[i] = (struct proc *)0; 545 wakeup((caddr_t)&nfs_iodwant[i]); 546 return (0); 547 } 548 549 /* 550 * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE 551 * return EIO so the process will call nfs_doio() and do it 552 * synchronously. 553 */ 554 if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE)) 555 return (EIO); 556 557 /* 558 * Just turn the async write into a delayed write, instead of 559 * doing in synchronously. Hopefully, at least one of the nfsiods 560 * is currently doing a write for this file and will pick up the 561 * delayed writes before going back to sleep. 562 */ 563 s = splbio(); 564 buf_dirty(bp); 565 biodone(bp); 566 splx(s); 567 return (0); 568 } 569 570 /* 571 * Do an I/O operation to/from a cache block. This may be called 572 * synchronously or from an nfsiod. 573 */ 574 int 575 nfs_doio(bp, p) 576 struct buf *bp; 577 struct proc *p; 578 { 579 struct uio *uiop; 580 struct vnode *vp; 581 struct nfsnode *np; 582 struct nfsmount *nmp; 583 int s, error = 0, diff, len, iomode, must_commit = 0; 584 struct uio uio; 585 struct iovec io; 586 587 vp = bp->b_vp; 588 np = VTONFS(vp); 589 nmp = VFSTONFS(vp->v_mount); 590 uiop = &uio; 591 uiop->uio_iov = &io; 592 uiop->uio_iovcnt = 1; 593 uiop->uio_segflg = UIO_SYSSPACE; 594 uiop->uio_procp = p; 595 596 /* 597 * Historically, paging was done with physio, but no more... 598 */ 599 if (bp->b_flags & B_PHYS) { 600 /* 601 * ...though reading /dev/drum still gets us here. 602 */ 603 io.iov_len = uiop->uio_resid = bp->b_bcount; 604 /* mapping was done by vmapbuf() */ 605 io.iov_base = bp->b_data; 606 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 607 if (bp->b_flags & B_READ) { 608 uiop->uio_rw = UIO_READ; 609 nfsstats.read_physios++; 610 error = nfs_readrpc(vp, uiop); 611 } else { 612 iomode = NFSV3WRITE_DATASYNC; 613 uiop->uio_rw = UIO_WRITE; 614 nfsstats.write_physios++; 615 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 616 } 617 if (error) { 618 bp->b_flags |= B_ERROR; 619 bp->b_error = error; 620 } 621 } else if (bp->b_flags & B_READ) { 622 io.iov_len = uiop->uio_resid = bp->b_bcount; 623 io.iov_base = bp->b_data; 624 uiop->uio_rw = UIO_READ; 625 switch (vp->v_type) { 626 case VREG: 627 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 628 nfsstats.read_bios++; 629 error = nfs_readrpc(vp, uiop); 630 if (!error) { 631 bp->b_validoff = 0; 632 if (uiop->uio_resid) { 633 /* 634 * If len > 0, there is a hole in the file and 635 * no writes after the hole have been pushed to 636 * the server yet. 637 * Just zero fill the rest of the valid area. 638 */ 639 diff = bp->b_bcount - uiop->uio_resid; 640 len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT) 641 + diff); 642 if (len > 0) { 643 len = min(len, uiop->uio_resid); 644 bzero((char *)bp->b_data + diff, len); 645 bp->b_validend = diff + len; 646 } else 647 bp->b_validend = diff; 648 } else 649 bp->b_validend = bp->b_bcount; 650 } 651 if (p && (vp->v_flag & VTEXT) && 652 (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) { 653 uprintf("Process killed due to text file modification\n"); 654 psignal(p, SIGKILL); 655 p->p_holdcnt++; 656 } 657 break; 658 case VLNK: 659 uiop->uio_offset = (off_t)0; 660 nfsstats.readlink_bios++; 661 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred); 662 break; 663 default: 664 printf("nfs_doio: type %x unexpected\n",vp->v_type); 665 break; 666 }; 667 if (error) { 668 bp->b_flags |= B_ERROR; 669 bp->b_error = error; 670 } 671 } else { 672 io.iov_len = uiop->uio_resid = bp->b_dirtyend 673 - bp->b_dirtyoff; 674 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE 675 + bp->b_dirtyoff; 676 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 677 uiop->uio_rw = UIO_WRITE; 678 nfsstats.write_bios++; 679 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) 680 iomode = NFSV3WRITE_UNSTABLE; 681 else 682 iomode = NFSV3WRITE_FILESYNC; 683 bp->b_flags |= B_WRITEINPROG; 684 #ifdef fvdl_debug 685 printf("nfs_doio(%x): bp %x doff %d dend %d\n", 686 vp, bp, bp->b_dirtyoff, bp->b_dirtyend); 687 #endif 688 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 689 if (!error && iomode == NFSV3WRITE_UNSTABLE) 690 bp->b_flags |= B_NEEDCOMMIT; 691 else 692 bp->b_flags &= ~B_NEEDCOMMIT; 693 bp->b_flags &= ~B_WRITEINPROG; 694 695 /* 696 * For an interrupted write, the buffer is still valid and the 697 * write hasn't been pushed to the server yet, so we can't set 698 * B_ERROR and report the interruption by setting B_EINTR. For 699 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 700 * is essentially a noop. 701 * For the case of a V3 write rpc not being committed to stable 702 * storage, the block is still dirty and requires either a commit 703 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC 704 * before the block is reused. This is indicated by setting the 705 * B_DELWRI and B_NEEDCOMMIT flags. 706 */ 707 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 708 s = splbio(); 709 buf_dirty(bp); 710 splx(s); 711 712 if (!(bp->b_flags & B_ASYNC) && error) 713 bp->b_flags |= B_EINTR; 714 } else { 715 if (error) { 716 bp->b_flags |= B_ERROR; 717 bp->b_error = np->n_error = error; 718 np->n_flag |= NWRITEERR; 719 } 720 bp->b_dirtyoff = bp->b_dirtyend = 0; 721 } 722 } 723 bp->b_resid = uiop->uio_resid; 724 if (must_commit) 725 nfs_clearcommit(vp->v_mount); 726 s = splbio(); 727 biodone(bp); 728 splx(s); 729 return (error); 730 } 731