1 /* $OpenBSD: nfs_bio.c,v 1.55 2008/08/09 10:14:02 thib Exp $ */ 2 /* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Rick Macklem at The University of Guelph. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/resourcevar.h> 41 #include <sys/signalvar.h> 42 #include <sys/proc.h> 43 #include <sys/buf.h> 44 #include <sys/vnode.h> 45 #include <sys/mount.h> 46 #include <sys/kernel.h> 47 #include <sys/namei.h> 48 #include <sys/queue.h> 49 50 #include <uvm/uvm_extern.h> 51 52 #include <nfs/rpcv2.h> 53 #include <nfs/nfsproto.h> 54 #include <nfs/nfs.h> 55 #include <nfs/nfsmount.h> 56 #include <nfs/nfsnode.h> 57 #include <nfs/nfs_var.h> 58 59 extern int nfs_numasync; 60 extern struct nfsstats nfsstats; 61 struct nfs_bufqhead nfs_bufq; 62 uint32_t nfs_bufqmax, nfs_bufqlen; 63 64 /* 65 * Vnode op for read using bio 66 * Any similarity to readip() is purely coincidental 67 */ 68 int 69 nfs_bioread(vp, uio, ioflag, cred) 70 struct vnode *vp; 71 struct uio *uio; 72 int ioflag; 73 struct ucred *cred; 74 { 75 struct nfsnode *np = VTONFS(vp); 76 int biosize, diff; 77 struct buf *bp = NULL, *rabp; 78 struct vattr vattr; 79 struct proc *p; 80 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 81 daddr64_t lbn, bn, rabn; 82 caddr_t baddr; 83 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; 84 off_t offdiff; 85 86 #ifdef DIAGNOSTIC 87 if (uio->uio_rw != UIO_READ) 88 panic("nfs_read mode"); 89 #endif 90 if (uio->uio_resid == 0) 91 return (0); 92 if (uio->uio_offset < 0) 93 return (EINVAL); 94 p = uio->uio_procp; 95 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 96 (void)nfs_fsinfo(nmp, vp, cred, p); 97 biosize = nmp->nm_rsize; 98 /* 99 * For nfs, cache consistency can only be maintained approximately. 100 * Although RFC1094 does not specify the criteria, the following is 101 * believed to be compatible with the reference port. 102 * For nfs: 103 * If the file's modify time on the server has changed since the 104 * last read rpc or you have written to the file, 105 * you may have lost data cache consistency with the 106 * server, so flush all of the file's data out of the cache. 107 * Then force a getattr rpc to ensure that you have up to date 108 * attributes. 109 * NB: This implies that cache data can be read when up to 110 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 111 * attributes this could be forced by setting n_attrstamp to 0 before 112 * the VOP_GETATTR() call. 113 */ 114 if (np->n_flag & NMODIFIED) { 115 np->n_attrstamp = 0; 116 error = VOP_GETATTR(vp, &vattr, cred, p); 117 if (error) 118 return (error); 119 np->n_mtime = vattr.va_mtime.tv_sec; 120 } else { 121 error = VOP_GETATTR(vp, &vattr, cred, p); 122 if (error) 123 return (error); 124 if (np->n_mtime != vattr.va_mtime.tv_sec) { 125 error = nfs_vinvalbuf(vp, V_SAVE, cred, p); 126 if (error) 127 return (error); 128 np->n_mtime = vattr.va_mtime.tv_sec; 129 } 130 } 131 132 /* 133 * update the cache read creds for this vnode 134 */ 135 if (np->n_rcred) 136 crfree(np->n_rcred); 137 np->n_rcred = cred; 138 crhold(cred); 139 140 do { 141 if ((vp->v_flag & VROOT) && vp->v_type == VLNK) { 142 return (nfs_readlinkrpc(vp, uio, cred)); 143 } 144 baddr = (caddr_t)0; 145 switch (vp->v_type) { 146 case VREG: 147 nfsstats.biocache_reads++; 148 lbn = uio->uio_offset / biosize; 149 on = uio->uio_offset & (biosize - 1); 150 bn = lbn * (biosize / DEV_BSIZE); 151 not_readin = 1; 152 153 /* 154 * Start the read ahead(s), as required. 155 */ 156 if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 157 for (nra = 0; nra < nmp->nm_readahead && 158 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 159 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 160 if (!incore(vp, rabn)) { 161 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 162 if (!rabp) 163 return (EINTR); 164 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 165 rabp->b_flags |= (B_READ | B_ASYNC); 166 if (nfs_asyncio(rabp)) { 167 rabp->b_flags |= B_INVAL; 168 brelse(rabp); 169 } 170 } else 171 brelse(rabp); 172 } 173 } 174 } 175 176 /* 177 * If the block is in the cache and has the required data 178 * in a valid region, just copy it out. 179 * Otherwise, get the block and write back/read in, 180 * as required. 181 */ 182 if ((bp = incore(vp, bn)) && 183 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 184 (B_BUSY | B_WRITEINPROG)) 185 got_buf = 0; 186 else { 187 again: 188 bp = nfs_getcacheblk(vp, bn, biosize, p); 189 if (!bp) 190 return (EINTR); 191 got_buf = 1; 192 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 193 bp->b_flags |= B_READ; 194 not_readin = 0; 195 error = nfs_doio(bp, p); 196 if (error) { 197 brelse(bp); 198 return (error); 199 } 200 } 201 } 202 n = min((unsigned)(biosize - on), uio->uio_resid); 203 offdiff = np->n_size - uio->uio_offset; 204 if (offdiff < (off_t)n) 205 n = (int)offdiff; 206 if (not_readin && n > 0) { 207 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 208 if (!got_buf) { 209 bp = nfs_getcacheblk(vp, bn, biosize, p); 210 if (!bp) 211 return (EINTR); 212 got_buf = 1; 213 } 214 bp->b_flags |= B_INVAFTERWRITE; 215 if (bp->b_dirtyend > 0) { 216 if ((bp->b_flags & B_DELWRI) == 0) 217 panic("nfsbioread"); 218 if (VOP_BWRITE(bp) == EINTR) 219 return (EINTR); 220 } else 221 brelse(bp); 222 goto again; 223 } 224 } 225 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 226 if (diff < n) 227 n = diff; 228 break; 229 case VLNK: 230 nfsstats.biocache_readlinks++; 231 bp = nfs_getcacheblk(vp, 0, NFS_MAXPATHLEN, p); 232 if (!bp) 233 return (EINTR); 234 if ((bp->b_flags & B_DONE) == 0) { 235 bp->b_flags |= B_READ; 236 error = nfs_doio(bp, p); 237 if (error) { 238 brelse(bp); 239 return (error); 240 } 241 } 242 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 243 got_buf = 1; 244 on = 0; 245 break; 246 default: 247 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 248 break; 249 } 250 251 if (n > 0) { 252 if (!baddr) 253 baddr = bp->b_data; 254 error = uiomove(baddr + on, (int)n, uio); 255 } 256 switch (vp->v_type) { 257 case VREG: 258 break; 259 case VLNK: 260 n = 0; 261 break; 262 default: 263 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 264 } 265 if (got_buf) 266 brelse(bp); 267 } while (error == 0 && uio->uio_resid > 0 && n > 0); 268 return (error); 269 } 270 271 /* 272 * Vnode op for write using bio 273 */ 274 int 275 nfs_write(v) 276 void *v; 277 { 278 struct vop_write_args *ap = v; 279 int biosize; 280 struct uio *uio = ap->a_uio; 281 struct proc *p = uio->uio_procp; 282 struct vnode *vp = ap->a_vp; 283 struct nfsnode *np = VTONFS(vp); 284 struct ucred *cred = ap->a_cred; 285 int ioflag = ap->a_ioflag; 286 struct buf *bp; 287 struct vattr vattr; 288 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 289 daddr64_t lbn, bn; 290 int n, on, error = 0, extended = 0, wrotedta = 0, truncated = 0; 291 292 #ifdef DIAGNOSTIC 293 if (uio->uio_rw != UIO_WRITE) 294 panic("nfs_write mode"); 295 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 296 panic("nfs_write proc"); 297 #endif 298 if (vp->v_type != VREG) 299 return (EIO); 300 if (np->n_flag & NWRITEERR) { 301 np->n_flag &= ~NWRITEERR; 302 return (np->n_error); 303 } 304 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 305 (void)nfs_fsinfo(nmp, vp, cred, p); 306 if (ioflag & (IO_APPEND | IO_SYNC)) { 307 if (np->n_flag & NMODIFIED) { 308 np->n_attrstamp = 0; 309 error = nfs_vinvalbuf(vp, V_SAVE, cred, p); 310 if (error) 311 return (error); 312 } 313 if (ioflag & IO_APPEND) { 314 np->n_attrstamp = 0; 315 error = VOP_GETATTR(vp, &vattr, cred, p); 316 if (error) 317 return (error); 318 uio->uio_offset = np->n_size; 319 } 320 } 321 if (uio->uio_offset < 0) 322 return (EINVAL); 323 if (uio->uio_resid == 0) 324 return (0); 325 /* 326 * Maybe this should be above the vnode op call, but so long as 327 * file servers have no limits, i don't think it matters 328 */ 329 if (p && uio->uio_offset + uio->uio_resid > 330 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 331 psignal(p, SIGXFSZ); 332 return (EFBIG); 333 } 334 335 /* 336 * update the cache write creds for this node. 337 */ 338 if (np->n_wcred) 339 crfree(np->n_wcred); 340 np->n_wcred = cred; 341 crhold(cred); 342 343 /* 344 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 345 * will be the same size within a filesystem. nfs_writerpc will 346 * still use nm_wsize when sizing the rpc's. 347 */ 348 biosize = nmp->nm_rsize; 349 do { 350 351 /* 352 * XXX make sure we aren't cached in the VM page cache 353 */ 354 uvm_vnp_uncache(vp); 355 356 nfsstats.biocache_writes++; 357 lbn = uio->uio_offset / biosize; 358 on = uio->uio_offset & (biosize-1); 359 n = min((unsigned)(biosize - on), uio->uio_resid); 360 bn = lbn * (biosize / DEV_BSIZE); 361 again: 362 bp = nfs_getcacheblk(vp, bn, biosize, p); 363 if (!bp) 364 return (EINTR); 365 np->n_flag |= NMODIFIED; 366 if (uio->uio_offset + n > np->n_size) { 367 np->n_size = uio->uio_offset + n; 368 uvm_vnp_setsize(vp, (u_long)np->n_size); 369 extended = 1; 370 } else if (uio->uio_offset + n < np->n_size) 371 truncated = 1; 372 373 /* 374 * If the new write will leave a contiguous dirty 375 * area, just update the b_dirtyoff and b_dirtyend, 376 * otherwise force a write rpc of the old dirty area. 377 */ 378 if (bp->b_dirtyend > 0 && 379 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 380 bp->b_proc = p; 381 if (VOP_BWRITE(bp) == EINTR) 382 return (EINTR); 383 goto again; 384 } 385 386 error = uiomove((char *)bp->b_data + on, n, uio); 387 if (error) { 388 bp->b_flags |= B_ERROR; 389 brelse(bp); 390 return (error); 391 } 392 if (bp->b_dirtyend > 0) { 393 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 394 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 395 } else { 396 bp->b_dirtyoff = on; 397 bp->b_dirtyend = on + n; 398 } 399 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 400 bp->b_validoff > bp->b_dirtyend) { 401 bp->b_validoff = bp->b_dirtyoff; 402 bp->b_validend = bp->b_dirtyend; 403 } else { 404 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 405 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 406 } 407 408 wrotedta = 1; 409 410 /* 411 * Since this block is being modified, it must be written 412 * again and not just committed. 413 */ 414 415 if (NFS_ISV3(vp)) { 416 rw_enter_write(&np->n_commitlock); 417 if (bp->b_flags & B_NEEDCOMMIT) { 418 bp->b_flags &= ~B_NEEDCOMMIT; 419 nfs_del_tobecommitted_range(vp, bp); 420 } 421 nfs_del_committed_range(vp, bp); 422 rw_exit_write(&np->n_commitlock); 423 } else 424 bp->b_flags &= ~B_NEEDCOMMIT; 425 426 /* 427 * If the lease is non-cachable or IO_SYNC do bwrite(). 428 */ 429 if (ioflag & IO_SYNC) { 430 bp->b_proc = p; 431 error = VOP_BWRITE(bp); 432 if (error) 433 return (error); 434 } else if ((n + on) == biosize) { 435 bp->b_proc = (struct proc *)0; 436 bp->b_flags |= B_ASYNC; 437 (void)nfs_writebp(bp, 0); 438 } else { 439 bdwrite(bp); 440 } 441 } while (uio->uio_resid > 0 && n > 0); 442 443 if (wrotedta) 444 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0) | 445 (truncated ? NOTE_TRUNCATE : 0)); 446 447 return (0); 448 } 449 450 /* 451 * Get an nfs cache block. 452 * Allocate a new one if the block isn't currently in the cache 453 * and return the block marked busy. If the calling process is 454 * interrupted by a signal for an interruptible mount point, return 455 * NULL. 456 */ 457 struct buf * 458 nfs_getcacheblk(vp, bn, size, p) 459 struct vnode *vp; 460 daddr64_t bn; 461 int size; 462 struct proc *p; 463 { 464 struct buf *bp; 465 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 466 467 if (nmp->nm_flag & NFSMNT_INT) { 468 bp = getblk(vp, bn, size, PCATCH, 0); 469 while (bp == (struct buf *)0) { 470 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 471 return ((struct buf *)0); 472 bp = getblk(vp, bn, size, 0, 2 * hz); 473 } 474 } else 475 bp = getblk(vp, bn, size, 0, 0); 476 return (bp); 477 } 478 479 /* 480 * Flush and invalidate all dirty buffers. If another process is already 481 * doing the flush, just wait for completion. 482 */ 483 int 484 nfs_vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p) 485 { 486 struct nfsmount *nmp= VFSTONFS(vp->v_mount); 487 struct nfsnode *np = VTONFS(vp); 488 int error, sintr, stimeo; 489 490 error = sintr = stimeo = 0; 491 492 if (ISSET(nmp->nm_flag, NFSMNT_INT)) { 493 sintr = PCATCH; 494 stimeo = 2 * hz; 495 } 496 497 /* First wait for any other process doing a flush to complete. */ 498 while (np->n_flag & NFLUSHINPROG) { 499 np->n_flag |= NFLUSHWANT; 500 error = tsleep(&np->n_flag, PRIBIO|sintr, "nfsvinval", stimeo); 501 if (error && sintr && nfs_sigintr(nmp, NULL, p)) 502 return (EINTR); 503 } 504 505 /* Now, flush as required. */ 506 np->n_flag |= NFLUSHINPROG; 507 error = vinvalbuf(vp, flags, cred, p, sintr, 0); 508 while (error) { 509 if (sintr && nfs_sigintr(nmp, NULL, p)) { 510 np->n_flag &= ~NFLUSHINPROG; 511 if (np->n_flag & NFLUSHWANT) { 512 np->n_flag &= ~NFLUSHWANT; 513 wakeup(&np->n_flag); 514 } 515 return (EINTR); 516 } 517 error = vinvalbuf(vp, flags, cred, p, 0, stimeo); 518 } 519 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 520 if (np->n_flag & NFLUSHWANT) { 521 np->n_flag &= ~NFLUSHWANT; 522 wakeup(&np->n_flag); 523 } 524 return (0); 525 } 526 527 /* 528 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 529 * This is mainly to avoid queueing async I/O requests when the nfsiods 530 * are all hung on a dead server. 531 */ 532 int 533 nfs_asyncio(bp) 534 struct buf *bp; 535 { 536 if (nfs_numasync == 0) 537 goto out; 538 539 if (nfs_bufqlen > nfs_bufqmax) 540 goto out; /* too many bufs in use, force sync */ 541 542 if ((bp->b_flags & B_READ) == 0) { 543 bp->b_flags |= B_WRITEINPROG; 544 } 545 546 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 547 nfs_bufqlen++; 548 549 wakeup_one(&nfs_bufq); 550 return (0); 551 552 out: 553 nfsstats.forcedsync++; 554 return (EIO); 555 } 556 557 /* 558 * Do an I/O operation to/from a cache block. This may be called 559 * synchronously or from an nfsiod. 560 */ 561 int 562 nfs_doio(bp, p) 563 struct buf *bp; 564 struct proc *p; 565 { 566 struct uio *uiop; 567 struct vnode *vp; 568 struct nfsnode *np; 569 struct nfsmount *nmp; 570 int s, error = 0, diff, len, iomode, must_commit = 0; 571 struct uio uio; 572 struct iovec io; 573 574 vp = bp->b_vp; 575 np = VTONFS(vp); 576 nmp = VFSTONFS(vp->v_mount); 577 uiop = &uio; 578 uiop->uio_iov = &io; 579 uiop->uio_iovcnt = 1; 580 uiop->uio_segflg = UIO_SYSSPACE; 581 uiop->uio_procp = p; 582 583 /* 584 * Historically, paging was done with physio, but no more... 585 */ 586 if (bp->b_flags & B_PHYS) { 587 /* 588 * ...though reading /dev/drum still gets us here. 589 */ 590 io.iov_len = uiop->uio_resid = bp->b_bcount; 591 /* mapping was done by vmapbuf() */ 592 io.iov_base = bp->b_data; 593 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 594 if (bp->b_flags & B_READ) { 595 uiop->uio_rw = UIO_READ; 596 nfsstats.read_physios++; 597 bcstats.pendingreads++; 598 bcstats.numreads++; 599 error = nfs_readrpc(vp, uiop); 600 } else { 601 iomode = NFSV3WRITE_DATASYNC; 602 uiop->uio_rw = UIO_WRITE; 603 nfsstats.write_physios++; 604 bcstats.pendingwrites++; 605 bcstats.numwrites++; 606 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 607 } 608 if (error) { 609 bp->b_flags |= B_ERROR; 610 bp->b_error = error; 611 } 612 } else if (bp->b_flags & B_READ) { 613 io.iov_len = uiop->uio_resid = bp->b_bcount; 614 io.iov_base = bp->b_data; 615 uiop->uio_rw = UIO_READ; 616 switch (vp->v_type) { 617 case VREG: 618 uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT; 619 nfsstats.read_bios++; 620 bcstats.pendingreads++; 621 bcstats.numreads++; 622 error = nfs_readrpc(vp, uiop); 623 if (!error) { 624 bp->b_validoff = 0; 625 if (uiop->uio_resid) { 626 /* 627 * If len > 0, there is a hole in the file and 628 * no writes after the hole have been pushed to 629 * the server yet. 630 * Just zero fill the rest of the valid area. 631 */ 632 diff = bp->b_bcount - uiop->uio_resid; 633 len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT) 634 + diff); 635 if (len > 0) { 636 len = min(len, uiop->uio_resid); 637 bzero((char *)bp->b_data + diff, len); 638 bp->b_validend = diff + len; 639 } else 640 bp->b_validend = diff; 641 } else 642 bp->b_validend = bp->b_bcount; 643 } 644 if (p && (vp->v_flag & VTEXT) && 645 (np->n_mtime != np->n_vattr.va_mtime.tv_sec)) { 646 uprintf("Process killed due to text file modification\n"); 647 psignal(p, SIGKILL); 648 } 649 break; 650 case VLNK: 651 uiop->uio_offset = (off_t)0; 652 nfsstats.readlink_bios++; 653 bcstats.pendingreads++; 654 bcstats.numreads++; 655 error = nfs_readlinkrpc(vp, uiop, curproc->p_ucred); 656 break; 657 default: 658 printf("nfs_doio: type %x unexpected\n", vp->v_type); 659 break; 660 }; 661 if (error) { 662 bp->b_flags |= B_ERROR; 663 bp->b_error = error; 664 } 665 } else { 666 io.iov_len = uiop->uio_resid = bp->b_dirtyend 667 - bp->b_dirtyoff; 668 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE 669 + bp->b_dirtyoff; 670 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 671 uiop->uio_rw = UIO_WRITE; 672 nfsstats.write_bios++; 673 bcstats.pendingwrites++; 674 bcstats.numwrites++; 675 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) 676 iomode = NFSV3WRITE_UNSTABLE; 677 else 678 iomode = NFSV3WRITE_FILESYNC; 679 bp->b_flags |= B_WRITEINPROG; 680 error = nfs_writerpc(vp, uiop, &iomode, &must_commit); 681 682 rw_enter_write(&np->n_commitlock); 683 if (!error && iomode == NFSV3WRITE_UNSTABLE) { 684 bp->b_flags |= B_NEEDCOMMIT; 685 nfs_add_tobecommitted_range(vp, bp); 686 } else { 687 bp->b_flags &= ~B_NEEDCOMMIT; 688 nfs_del_committed_range(vp, bp); 689 } 690 rw_exit_write(&np->n_commitlock); 691 692 bp->b_flags &= ~B_WRITEINPROG; 693 694 /* 695 * For an interrupted write, the buffer is still valid and the 696 * write hasn't been pushed to the server yet, so we can't set 697 * B_ERROR and report the interruption by setting B_EINTR. For 698 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 699 * is essentially a noop. 700 * For the case of a V3 write rpc not being committed to stable 701 * storage, the block is still dirty and requires either a commit 702 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC 703 * before the block is reused. This is indicated by setting the 704 * B_DELWRI and B_NEEDCOMMIT flags. 705 */ 706 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 707 s = splbio(); 708 buf_dirty(bp); 709 splx(s); 710 711 if (!(bp->b_flags & B_ASYNC) && error) 712 bp->b_flags |= B_EINTR; 713 } else { 714 if (error) { 715 bp->b_flags |= B_ERROR; 716 bp->b_error = np->n_error = error; 717 np->n_flag |= NWRITEERR; 718 } 719 bp->b_dirtyoff = bp->b_dirtyend = 0; 720 } 721 } 722 bp->b_resid = uiop->uio_resid; 723 if (must_commit) 724 nfs_clearcommit(vp->v_mount); 725 s = splbio(); 726 biodone(bp); 727 splx(s); 728 return (error); 729 } 730