1 /* $OpenBSD: nfs_bio.c,v 1.67 2009/09/02 18:20:54 thib Exp $ */ 2 /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Rick Macklem at The University of Guelph. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/resourcevar.h> 41 #include <sys/signalvar.h> 42 #include <sys/proc.h> 43 #include <sys/buf.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/kernel.h> 47 #include <sys/namei.h> 48 #include <sys/queue.h> 49 #include <sys/time.h> 50 51 #include <uvm/uvm_extern.h> 52 53 #include <nfs/rpcv2.h> 54 #include <nfs/nfsproto.h> 55 #include <nfs/nfs.h> 56 #include <nfs/nfsmount.h> 57 #include <nfs/nfsnode.h> 58 #include <nfs/nfs_var.h> 59 60 extern int nfs_numasync; 61 extern struct nfsstats nfsstats; 62 struct nfs_bufqhead nfs_bufq; 63 uint32_t nfs_bufqmax, nfs_bufqlen; 64 65 /* 66 * Vnode op for read using bio 67 * Any similarity to readip() is purely coincidental 68 */ 69 int 70 nfs_bioread(vp, uio, ioflag, cred) 71 struct vnode *vp; 72 struct uio *uio; 73 int ioflag; 74 struct ucred *cred; 75 { 76 struct nfsnode *np = VTONFS(vp); 77 int biosize, diff; 78 struct buf *bp = NULL, *rabp; 79 struct vattr vattr; 80 struct proc *p; 81 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 82 daddr64_t lbn, bn, rabn; 83 caddr_t baddr; 84 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; 85 off_t offdiff; 86 87 #ifdef DIAGNOSTIC 88 if (uio->uio_rw != UIO_READ) 89 panic("nfs_read mode"); 90 #endif 91 if (uio->uio_resid == 0) 92 return (0); 93 if (uio->uio_offset < 0) 94 return (EINVAL); 95 p = uio->uio_procp; 96 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 97 (void)nfs_fsinfo(nmp, vp, cred, p); 98 biosize = nmp->nm_rsize; 99 /* 100 * For nfs, cache consistency can only be maintained approximately. 101 * Although RFC1094 does not specify the criteria, the following is 102 * believed to be compatible with the reference port. 103 * For nfs: 104 * If the file's modify time on the server has changed since the 105 * last read rpc or you have written to the file, 106 * you may have lost data cache consistency with the 107 * server, so flush all of the file's data out of the cache. 108 * Then force a getattr rpc to ensure that you have up to date 109 * attributes. 110 */ 111 if (np->n_flag & NMODIFIED) { 112 NFS_INVALIDATE_ATTRCACHE(np); 113 error = VOP_GETATTR(vp, &vattr, cred, p); 114 if (error) 115 return (error); 116 np->n_mtime = vattr.va_mtime; 117 } else { 118 error = VOP_GETATTR(vp, &vattr, cred, p); 119 if (error) 120 return (error); 121 if (timespeccmp(&np->n_mtime, &vattr.va_mtime, !=)) { 122 error = nfs_vinvalbuf(vp, V_SAVE, cred, p); 123 if (error) 124 return (error); 125 np->n_mtime = vattr.va_mtime; 126 } 127 } 128 129 /* 130 * update the cache read creds for this vnode 131 */ 132 if (np->n_rcred) 133 crfree(np->n_rcred); 134 np->n_rcred = cred; 135 crhold(cred); 136 137 do { 138 if ((vp->v_flag & VROOT) && vp->v_type == VLNK) { 139 return (nfs_readlinkrpc(vp, uio, cred)); 140 } 141 baddr = (caddr_t)0; 142 switch (vp->v_type) { 143 case VREG: 144 nfsstats.biocache_reads++; 145 lbn = uio->uio_offset / biosize; 146 on = uio->uio_offset & (biosize - 1); 147 bn = lbn * (biosize / DEV_BSIZE); 148 not_readin = 1; 149 150 /* 151 * Start the read ahead(s), as required. 152 */ 153 if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 154 for (nra = 0; nra < nmp->nm_readahead && 155 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 156 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 157 if (!incore(vp, rabn)) { 158 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 159 if (!rabp) 160 return (EINTR); 161 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 162 rabp->b_flags |= (B_READ | B_ASYNC); 163 if (nfs_asyncio(rabp)) { 164 rabp->b_flags |= B_INVAL; 165 brelse(rabp); 166 } 167 } else 168 brelse(rabp); 169 } 170 } 171 } 172 173 again: 174 bp = nfs_getcacheblk(vp, bn, biosize, p); 175 if (!bp) 176 return (EINTR); 177 got_buf = 1; 178 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 179 bp->b_flags |= B_READ; 180 not_readin = 0; 181 error = nfs_doio(bp, p); 182 if (error) { 183 brelse(bp); 184 return (error); 185 } 186 } 187 n = min((unsigned)(biosize - on), uio->uio_resid); 188 offdiff = np->n_size - uio->uio_offset; 189 if (offdiff < (off_t)n) 190 n = (int)offdiff; 191 if (not_readin && n > 0) { 192 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 193 bp->b_flags |= B_INVAFTERWRITE; 194 if (bp->b_dirtyend > 0) { 195 if ((bp->b_flags & B_DELWRI) == 0) 196 panic("nfsbioread"); 197 if (VOP_BWRITE(bp) == EINTR) 198 return (EINTR); 199 } else 200 brelse(bp); 201 goto again; 202 } 203 } 204 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 205 if (diff < n) 206 n = diff; 207 break; 208 case VLNK: 209 nfsstats.biocache_readlinks++; 210 bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p); 211 if (!bp) 212 return (EINTR); 213 if ((bp->b_flags & B_DONE) == 0) { 214 bp->b_flags |= B_READ; 215 error = nfs_doio(bp, p); 216 if (error) { 217 brelse(bp); 218 return (error); 219 } 220 } 221 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 222 got_buf = 1; 223 on = 0; 224 break; 225 default: 226 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 227 break; 228 } 229 230 if (n > 0) { 231 if (!baddr) 232 baddr = bp->b_data; 233 error = uiomove(baddr + on, (int)n, uio); 234 } 235 switch (vp->v_type) { 236 case VREG: 237 break; 238 case VLNK: 239 n = 0; 240 break; 241 default: 242 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 243 } 244 if (got_buf) 245 brelse(bp); 246 } while (error == 0 && uio->uio_resid > 0 && n > 0); 247 return (error); 248 } 249 250 /* 251 * Vnode op for write using bio 252 */ 253 int 254 nfs_write(v) 255 void *v; 256 { 257 struct vop_write_args *ap = v; 258 int biosize; 259 struct uio *uio = ap->a_uio; 260 struct proc *p = uio->uio_procp; 261 struct vnode *vp = ap->a_vp; 262 struct nfsnode *np = VTONFS(vp); 263 struct ucred *cred = ap->a_cred; 264 int ioflag = ap->a_ioflag; 265 struct buf *bp; 266 struct vattr vattr; 267 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 268 daddr64_t lbn, bn; 269 int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0; 270 271 #ifdef DIAGNOSTIC 272 if (uio->uio_rw != UIO_WRITE) 273 panic("nfs_write mode"); 274 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 275 panic("nfs_write proc"); 276 #endif 277 if (vp->v_type != VREG) 278 return (EIO); 279 if (np->n_flag & NWRITEERR) { 280 np->n_flag &= ~NWRITEERR; 281 return (np->n_error); 282 } 283 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 284 (void)nfs_fsinfo(nmp, vp, cred, p); 285 if (ioflag & (IO_APPEND | IO_SYNC)) { 286 if (np->n_flag & NMODIFIED) { 287 NFS_INVALIDATE_ATTRCACHE(np); 288 error = nfs_vinvalbuf(vp, V_SAVE, cred, p); 289 if (error) 290 return (error); 291 } 292 if (ioflag & IO_APPEND) { 293 NFS_INVALIDATE_ATTRCACHE(np); 294 error = VOP_GETATTR(vp, &vattr, cred, p); 295 if (error) 296 return (error); 297 uio->uio_offset = np->n_size; 298 } 299 } 300 if (uio->uio_offset < 0) 301 return (EINVAL); 302 if (uio->uio_resid == 0) 303 return (0); 304 /* 305 * Maybe this should be above the vnode op call, but so long as 306 * file servers have no limits, i don't think it matters 307 */ 308 if (p && uio->uio_offset + uio->uio_resid > 309 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 310 psignal(p, SIGXFSZ); 311 return (EFBIG); 312 } 313 314 /* 315 * update the cache write creds for this node. 316 */ 317 if (np->n_wcred) 318 crfree(np->n_wcred); 319 np->n_wcred = cred; 320 crhold(cred); 321 322 /* 323 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 324 * will be the same size within a filesystem. nfs_writerpc will 325 * still use nm_wsize when sizing the rpc's. 326 */ 327 biosize = nmp->nm_rsize; 328 do { 329 330 /* 331 * XXX make sure we aren't cached in the VM page cache 332 */ 333 uvm_vnp_uncache(vp); 334 335 nfsstats.biocache_writes++; 336 lbn = uio->uio_offset / biosize; 337 on = uio->uio_offset & (biosize-1); 338 n = min((unsigned)(biosize - on), uio->uio_resid); 339 bn = lbn * (biosize / DEV_BSIZE); 340 again: 341 bp = nfs_getcacheblk(vp, bn, biosize, p); 342 if (!bp) 343 return (EINTR); 344 np->n_flag |= NMODIFIED; 345 if (uio->uio_offset + n > np->n_size) { 346 np->n_size = uio->uio_offset + n; 347 uvm_vnp_setsize(vp, (u_long)np->n_size); 348 extended = 1; 349 } else if (uio->uio_offset + n < np->n_size) 350 truncated = 1; 351 352 /* 353 * If the new write will leave a contiguous dirty 354 * area, just update the b_dirtyoff and b_dirtyend, 355 * otherwise force a write rpc of the old dirty area. 356 */ 357 if (bp->b_dirtyend > 0 && 358 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 359 bp->b_proc = p; 360 if (VOP_BWRITE(bp) == EINTR) 361 return (EINTR); 362 goto again; 363 } 364 365 error = uiomove((char *)bp->b_data + on, n, uio); 366 if (error) { 367 bp->b_flags |= B_ERROR; 368 brelse(bp); 369 return (error); 370 } 371 if (bp->b_dirtyend > 0) { 372 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 373 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 374 } else { 375 bp->b_dirtyoff = on; 376 bp->b_dirtyend = on + n; 377 } 378 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 379 bp->b_validoff > bp->b_dirtyend) { 380 bp->b_validoff = bp->b_dirtyoff; 381 bp->b_validend = bp->b_dirtyend; 382 } else { 383 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 384 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 385 } 386 387 wrotedta = 1; 388 389 /* 390 * Since this block is being modified, it must be written 391 * again and not just committed. 392 */ 393 394 if (NFS_ISV3(vp)) { 395 rw_enter_write(&np->n_commitlock); 396 if (bp->b_flags & B_NEEDCOMMIT) { 397 bp->b_flags &= ~B_NEEDCOMMIT; 398 nfs_del_tobecommitted_range(vp, bp); 399 } 400 nfs_del_committed_range(vp, bp); 401 rw_exit_write(&np->n_commitlock); 402 } else 403 bp->b_flags &= ~B_NEEDCOMMIT; 404 405 if (ioflag & IO_SYNC) { 406 bp->b_proc = p; 407 error = VOP_BWRITE(bp); 408 if (error) 409 return (error); 410 } else if ((n + on) == biosize) { 411 bp->b_proc = NULL; 412 bp->b_flags |= B_ASYNC; 413 (void)nfs_writebp(bp, 0); 414 } else { 415 bdwrite(bp); 416 } 417 } while (uio->uio_resid > 0 && n > 0); 418 419 if (wrotedta) 420 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) | 421 (truncated ? NOTE_TRUNCATE : 0)); 422 423 return (0); 424 } 425 426 /* 427 * Get an nfs cache block. 428 * Allocate a new one if the block isn't currently in the cache 429 * and return the block marked busy. If the calling process is 430 * interrupted by a signal for an interruptible mount point, return 431 * NULL. 432 */ 433 struct buf * 434 nfs_getcacheblk(vp, bn, size, p) 435 struct vnode *vp; 436 daddr64_t bn; 437 int size; 438 struct proc *p; 439 { 440 struct buf *bp; 441 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 442 443 if (nmp->nm_flag & NFSMNT_INT) { 444 bp = getblk(vp, bn, size, PCATCH, 0); 445 while (bp == NULL) { 446 if (nfs_sigintr(nmp, NULL, p)) 447 return (NULL); 448 bp = getblk(vp, bn, size, 0, 2 * hz); 449 } 450 } else 451 bp = getblk(vp, bn, size, 0, 0); 452 return (bp); 453 } 454 455 /* 456 * Flush and invalidate all dirty buffers. If another process is already 457 * doing the flush, just wait for completion. 458 */ 459 int 460 nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p) 461 { 462 struct nfsmount *nmp= VFSTONFS(vp->v_mount); 463 struct nfsnode *np = VTONFS(vp); 464 int error, sintr, stimeo; 465 466 error = sintr = stimeo = 0; 467 468 if (ISSET(nmp->nm_flag, NFSMNT_INT)) { 469 sintr = PCATCH; 470 stimeo = 2 * hz; 471 } 472 473 /* First wait for any other process doing a flush to complete. */ 474 while (np->n_flag & NFLUSHINPROG) { 475 np->n_flag |= NFLUSHWANT; 476 error = tsleep(&np->n_flag, PRIBIO|sintr, "nfsvinval", stimeo); 477 if (error && sintr && nfs_sigintr(nmp, NULL, p)) 478 return (EINTR); 479 } 480 481 /* Now, flush as required. */ 482 np->n_flag |= NFLUSHINPROG; 483 error = vinvalbuf(vp, flags, cred, p, sintr, 0); 484 while (error) { 485 if (sintr && nfs_sigintr(nmp, NULL, p)) { 486 np->n_flag &= ~NFLUSHINPROG; 487 if (np->n_flag & NFLUSHWANT) { 488 np->n_flag &= ~NFLUSHWANT; 489 wakeup(&np->n_flag); 490 } 491 return (EINTR); 492 } 493 error = vinvalbuf(vp, flags, cred, p, 0, stimeo); 494 } 495 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 496 if (np->n_flag & NFLUSHWANT) { 497 np->n_flag &= ~NFLUSHWANT; 498 wakeup(&np->n_flag); 499 } 500 return (0); 501 } 502 503 /* 504 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 505 * This is mainly to avoid queueing async I/O requests when the nfsiods 506 * are all hung on a dead server. 507 */ 508 int 509 nfs_asyncio(bp) 510 struct buf *bp; 511 { 512 if (nfs_numasync == 0) 513 goto out; 514 515 if (nfs_bufqlen > nfs_bufqmax) 516 goto out; /* too many bufs in use, force sync */ 517 518 if ((bp->b_flags & B_READ) == 0) { 519 bp->b_flags |= B_WRITEINPROG; 520 } 521 522 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 523 nfs_bufqlen++; 524 525 wakeup_one(&nfs_bufq); 526 return (0); 527 528 out: 529 nfsstats.forcedsync++; 530 return (EIO); 531 } 532 533 /* 534 * Do an I/O operation to/from a cache block. This may be called 535 * synchronously or from an nfsiod. 536 */ 537 int 538 nfs_doio(bp, p) 539 struct buf *bp; 540 struct proc *p; 541 { 542 struct uio *uiop; 543 struct vnode *vp; 544 struct nfsnode *np; 545 struct nfsmount *nmp; 546 int s, error = 0, diff, len, iomode, must_commit = 0; 547 struct uio uio; 548 struct iovec io; 549 550 vp = bp->b_vp; 551 np = VTONFS(vp); 552 nmp = VFSTONFS(vp->v_mount); 553 uiop = &uio; 554 uiop->uio_iov = &io; 555 uiop->uio_iovcnt = 1; 556 uiop->uio_segflg = UIO_SYSSPACE; 557 uiop->uio_procp = p; 558 559 /* 560 * Historically, paging was done with physio, but no more. 561 */ 562 if (bp->b_flags & B_PHYS) { 563 io.iov_len = uiop->uio_resid = bp->b_bcount; 564 /* mapping was done by vmapbuf() */ 565 io.iov_base = bp->b_data; 566 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 567 if (bp->b_flags & B_READ) { 568 uiop->uio_rw = UIO_READ; 569 nfsstats.read_physios++; 570 error = nfs_readrpc(vp, uiop); 571 } else { 572 iomode = NFSV3WRITE_DATASYNC; 573 uiop->uio_rw = UIO_WRITE; 574 nfsstats.write_physios++; 575 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 576 } 577 if (error) { 578 bp->b_flags |= B_ERROR; 579 bp->b_error = error; 580 } 581 } else if (bp->b_flags & B_READ) { 582 io.iov_len = uiop->uio_resid = bp->b_bcount; 583 io.iov_base = bp->b_data; 584 uiop->uio_rw = UIO_READ; 585 switch (vp->v_type) { 586 case VREG: 587 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 588 nfsstats.read_bios++; 589 bcstats.pendingreads++; 590 bcstats.numreads++; 591 error = nfs_readrpc(vp, uiop); 592 if (!error) { 593 bp->b_validoff = 0; 594 if (uiop->uio_resid) { 595 /* 596 * If len > 0, there is a hole in the file and 597 * no writes after the hole have been pushed to 598 * the server yet. 599 * Just zero fill the rest of the valid area. 600 */ 601 diff = bp->b_bcount - uiop->uio_resid; 602 len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT) 603 + diff); 604 if (len > 0) { 605 len = min(len, uiop->uio_resid); 606 bzero((char *)bp->b_data + diff, len); 607 bp->b_validend = diff + len; 608 } else 609 bp->b_validend = diff; 610 } else 611 bp->b_validend = bp->b_bcount; 612 } 613 if (p && (vp->v_flag & VTEXT) && 614 (timespeccmp(&np->n_mtime, &np->n_vattr.va_mtime, !=))) { 615 uprintf("Process killed due to text file modification\n"); 616 psignal(p, SIGKILL); 617 } 618 break; 619 case VLNK: 620 uiop->uio_offset = (off_t)0; 621 nfsstats.readlink_bios++; 622 bcstats.pendingreads++; 623 bcstats.numreads++; 624 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred); 625 break; 626 default: 627 printf("nfs_doio: type %x unexpected\n", vp->v_type); 628 break; 629 }; 630 if (error) { 631 bp->b_flags |= B_ERROR; 632 bp->b_error = error; 633 } 634 } else { 635 io.iov_len = uiop->uio_resid = bp->b_dirtyend 636 - bp->b_dirtyoff; 637 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE 638 + bp->b_dirtyoff; 639 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 640 uiop->uio_rw = UIO_WRITE; 641 nfsstats.write_bios++; 642 bcstats.pendingwrites++; 643 bcstats.numwrites++; 644 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) 645 iomode = NFSV3WRITE_UNSTABLE; 646 else 647 iomode = NFSV3WRITE_FILESYNC; 648 bp->b_flags |= B_WRITEINPROG; 649 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 650 651 rw_enter_write(&np->n_commitlock); 652 if (!error && iomode == NFSV3WRITE_UNSTABLE) { 653 bp->b_flags |= B_NEEDCOMMIT; 654 nfs_add_tobecommitted_range(vp, bp); 655 } else { 656 bp->b_flags &= ~B_NEEDCOMMIT; 657 nfs_del_committed_range(vp, bp); 658 } 659 rw_exit_write(&np->n_commitlock); 660 661 bp->b_flags &= ~B_WRITEINPROG; 662 663 /* 664 * For an interrupted write, the buffer is still valid and the 665 * write hasn't been pushed to the server yet, so we can't set 666 * B_ERROR and report the interruption by setting B_EINTR. For 667 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 668 * is essentially a noop. 669 * For the case of a V3 write rpc not being committed to stable 670 * storage, the block is still dirty and requires either a commit 671 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC 672 * before the block is reused. This is indicated by setting the 673 * B_DELWRI and B_NEEDCOMMIT flags. 674 */ 675 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 676 s = splbio(); 677 buf_dirty(bp); 678 splx(s); 679 680 if (!(bp->b_flags & B_ASYNC) && error) 681 bp->b_flags |= B_EINTR; 682 } else { 683 if (error) { 684 bp->b_flags |= B_ERROR; 685 bp->b_error = np->n_error = error; 686 np->n_flag |= NWRITEERR; 687 } 688 bp->b_dirtyoff = bp->b_dirtyend = 0; 689 } 690 } 691 bp->b_resid = uiop->uio_resid; 692 if (must_commit) 693 nfs_clearcommit(vp->v_mount); 694 s = splbio(); 695 biodone(bp); 696 splx(s); 697 return (error); 698 } 699