1 /* $NetBSD: nfs_bio.c,v 1.29 1996/10/13 01:39:04 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 39 */ 40 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/resourcevar.h> 45 #include <sys/signalvar.h> 46 #include <sys/proc.h> 47 #include <sys/buf.h> 48 #include <sys/vnode.h> 49 #include <sys/trace.h> 50 #include <sys/mount.h> 51 #include <sys/kernel.h> 52 #include <sys/namei.h> 53 54 #include <vm/vm.h> 55 56 #include <nfs/rpcv2.h> 57 #include <nfs/nfsproto.h> 58 #include <nfs/nfs.h> 59 #include <nfs/nfsmount.h> 60 #include <nfs/nqnfs.h> 61 #include <nfs/nfsnode.h> 62 #include <nfs/nfs_var.h> 63 64 extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; 65 extern int nfs_numasync; 66 extern struct nfsstats nfsstats; 67 68 /* 69 * Vnode op for read using bio 70 * Any similarity to readip() is purely coincidental 71 */ 72 int 73 nfs_bioread(vp, uio, ioflag, cred) 74 register struct vnode *vp; 75 register struct uio *uio; 76 int ioflag; 77 struct ucred *cred; 78 { 79 register struct nfsnode *np = VTONFS(vp); 80 register int biosize, diff, i; 81 struct buf *bp = NULL, *rabp; 82 struct vattr vattr; 83 struct proc *p; 84 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 85 daddr_t lbn, bn, rabn; 86 caddr_t baddr; 87 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; 88 89 #ifdef DIAGNOSTIC 90 if (uio->uio_rw != UIO_READ) 91 panic("nfs_read mode"); 92 #endif 93 if (uio->uio_resid == 0) 94 return (0); 95 if (uio->uio_offset < 0) 96 return (EINVAL); 97 p = uio->uio_procp; 98 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 99 (void)nfs_fsinfo(nmp, vp, cred, p); 100 biosize = nmp->nm_rsize; 101 /* 102 * For nfs, cache consistency can only be maintained approximately. 103 * Although RFC1094 does not specify the criteria, the following is 104 * believed to be compatible with the reference port. 105 * For nqnfs, full cache consistency is maintained within the loop. 106 * For nfs: 107 * If the file's modify time on the server has changed since the 108 * last read rpc or you have written to the file, 109 * you may have lost data cache consistency with the 110 * server, so flush all of the file's data out of the cache. 111 * Then force a getattr rpc to ensure that you have up to date 112 * attributes. 113 * NB: This implies that cache data can be read when up to 114 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 115 * attributes this could be forced by setting n_attrstamp to 0 before 116 * the VOP_GETATTR() call. 117 */ 118 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 119 if (np->n_flag & NMODIFIED) { 120 if (vp->v_type != VREG) { 121 if (vp->v_type != VDIR) 122 panic("nfs: bioread, not dir"); 123 nfs_invaldir(vp); 124 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 125 if (error) 126 return (error); 127 } 128 np->n_attrstamp = 0; 129 error = VOP_GETATTR(vp, &vattr, cred, p); 130 if (error) 131 return (error); 132 np->n_mtime = vattr.va_mtime.tv_sec; 133 } else { 134 error = VOP_GETATTR(vp, &vattr, cred, p); 135 if (error) 136 return (error); 137 if (np->n_mtime != vattr.va_mtime.tv_sec) { 138 if (vp->v_type == VDIR) 139 nfs_invaldir(vp); 140 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 141 if (error) 142 return (error); 143 np->n_mtime = vattr.va_mtime.tv_sec; 144 } 145 } 146 } 147 do { 148 149 /* 150 * Get a valid lease. If cached data is stale, flush it. 151 */ 152 if (nmp->nm_flag & NFSMNT_NQNFS) { 153 if (NQNFS_CKINVALID(vp, np, ND_READ)) { 154 do { 155 error = nqnfs_getlease(vp, ND_READ, cred, p); 156 } while (error == NQNFS_EXPIRED); 157 if (error) 158 return (error); 159 if (np->n_lrev != np->n_brev || 160 (np->n_flag & NQNFSNONCACHE) || 161 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 162 if (vp->v_type == VDIR) 163 nfs_invaldir(vp); 164 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 165 if (error) 166 return (error); 167 np->n_brev = np->n_lrev; 168 } 169 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 170 nfs_invaldir(vp); 171 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 172 if (error) 173 return (error); 174 } 175 } 176 /* 177 * Don't cache symlinks. 178 */ 179 if (np->n_flag & NQNFSNONCACHE 180 || ((vp->v_flag & VROOT) && vp->v_type == VLNK)) { 181 switch (vp->v_type) { 182 case VREG: 183 return (nfs_readrpc(vp, uio, cred)); 184 case VLNK: 185 return (nfs_readlinkrpc(vp, uio, cred)); 186 case VDIR: 187 break; 188 default: 189 printf(" NQNFSNONCACHE: type %x unexpected\n", 190 vp->v_type); 191 }; 192 } 193 baddr = (caddr_t)0; 194 switch (vp->v_type) { 195 case VREG: 196 nfsstats.biocache_reads++; 197 lbn = uio->uio_offset / biosize; 198 on = uio->uio_offset & (biosize - 1); 199 bn = lbn * (biosize / DEV_BSIZE); 200 not_readin = 1; 201 202 /* 203 * Start the read ahead(s), as required. 204 */ 205 if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 206 for (nra = 0; nra < nmp->nm_readahead && 207 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 208 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 209 if (!incore(vp, rabn)) { 210 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 211 if (!rabp) 212 return (EINTR); 213 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 214 rabp->b_flags |= (B_READ | B_ASYNC); 215 if (nfs_asyncio(rabp, cred)) { 216 rabp->b_flags |= B_INVAL; 217 brelse(rabp); 218 } 219 } else 220 brelse(rabp); 221 } 222 } 223 } 224 225 /* 226 * If the block is in the cache and has the required data 227 * in a valid region, just copy it out. 228 * Otherwise, get the block and write back/read in, 229 * as required. 230 */ 231 if ((bp = incore(vp, bn)) && 232 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 233 (B_BUSY | B_WRITEINPROG)) 234 got_buf = 0; 235 else { 236 again: 237 bp = nfs_getcacheblk(vp, bn, biosize, p); 238 if (!bp) 239 return (EINTR); 240 got_buf = 1; 241 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 242 bp->b_flags |= B_READ; 243 not_readin = 0; 244 error = nfs_doio(bp, cred, p); 245 if (error) { 246 brelse(bp); 247 return (error); 248 } 249 } 250 } 251 n = min((unsigned)(biosize - on), uio->uio_resid); 252 diff = np->n_size - uio->uio_offset; 253 if (diff < n) 254 n = diff; 255 if (not_readin && n > 0) { 256 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 257 if (!got_buf) { 258 bp = nfs_getcacheblk(vp, bn, biosize, p); 259 if (!bp) 260 return (EINTR); 261 got_buf = 1; 262 } 263 bp->b_flags |= B_INVAFTERWRITE; 264 if (bp->b_dirtyend > 0) { 265 if ((bp->b_flags & B_DELWRI) == 0) 266 panic("nfsbioread"); 267 if (VOP_BWRITE(bp) == EINTR) 268 return (EINTR); 269 } else 270 brelse(bp); 271 goto again; 272 } 273 } 274 vp->v_lastr = lbn; 275 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 276 if (diff < n) 277 n = diff; 278 break; 279 case VLNK: 280 nfsstats.biocache_readlinks++; 281 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 282 if (!bp) 283 return (EINTR); 284 if ((bp->b_flags & B_DONE) == 0) { 285 bp->b_flags |= B_READ; 286 error = nfs_doio(bp, cred, p); 287 if (error) { 288 brelse(bp); 289 return (error); 290 } 291 } 292 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 293 got_buf = 1; 294 on = 0; 295 break; 296 case VDIR: 297 if (uio->uio_resid < NFS_READDIRBLKSIZ) 298 return (0); 299 nfsstats.biocache_readdirs++; 300 lbn = uio->uio_offset / NFS_DIRBLKSIZ; 301 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); 302 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); 303 if (!bp) 304 return (EINTR); 305 if ((bp->b_flags & B_DONE) == 0) { 306 bp->b_flags |= B_READ; 307 error = nfs_doio(bp, cred, p); 308 if (error) { 309 brelse(bp); 310 while (error == NFSERR_BAD_COOKIE) { 311 nfs_invaldir(vp); 312 error = nfs_vinvalbuf(vp, 0, cred, p, 1); 313 /* 314 * Yuck! The directory has been modified on the 315 * server. The only way to get the block is by 316 * reading from the beginning to get all the 317 * offset cookies. 318 */ 319 for (i = 0; i <= lbn && !error; i++) { 320 bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p); 321 if (!bp) 322 return (EINTR); 323 if ((bp->b_flags & B_DONE) == 0) { 324 bp->b_flags |= B_READ; 325 error = nfs_doio(bp, cred, p); 326 if (error) 327 brelse(bp); 328 } 329 } 330 } 331 if (error) 332 return (error); 333 } 334 } 335 336 /* 337 * If not eof and read aheads are enabled, start one. 338 * (You need the current block first, so that you have the 339 * directory offset cookie of the next block.) 340 */ 341 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 342 (np->n_direofoffset == 0 || 343 (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && 344 !(np->n_flag & NQNFSNONCACHE) && 345 !incore(vp, lbn + 1)) { 346 rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p); 347 if (rabp) { 348 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 349 rabp->b_flags |= (B_READ | B_ASYNC); 350 if (nfs_asyncio(rabp, cred)) { 351 rabp->b_flags |= B_INVAL; 352 brelse(rabp); 353 } 354 } else 355 brelse(rabp); 356 } 357 } 358 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); 359 got_buf = 1; 360 break; 361 default: 362 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 363 break; 364 }; 365 366 if (n > 0) { 367 if (!baddr) 368 baddr = bp->b_data; 369 error = uiomove(baddr + on, (int)n, uio); 370 } 371 switch (vp->v_type) { 372 case VREG: 373 break; 374 case VLNK: 375 n = 0; 376 break; 377 case VDIR: 378 if (np->n_flag & NQNFSNONCACHE) 379 bp->b_flags |= B_INVAL; 380 break; 381 default: 382 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 383 } 384 if (got_buf) 385 brelse(bp); 386 } while (error == 0 && uio->uio_resid > 0 && n > 0); 387 return (error); 388 } 389 390 /* 391 * Vnode op for write using bio 392 */ 393 int 394 nfs_write(v) 395 void *v; 396 { 397 struct vop_write_args /* { 398 struct vnode *a_vp; 399 struct uio *a_uio; 400 int a_ioflag; 401 struct ucred *a_cred; 402 } */ *ap = v; 403 register int biosize; 404 register struct uio *uio = ap->a_uio; 405 struct proc *p = uio->uio_procp; 406 register struct vnode *vp = ap->a_vp; 407 struct nfsnode *np = VTONFS(vp); 408 register struct ucred *cred = ap->a_cred; 409 int ioflag = ap->a_ioflag; 410 struct buf *bp; 411 struct vattr vattr; 412 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 413 daddr_t lbn, bn; 414 int n, on, error = 0, iomode, must_commit; 415 416 #ifdef DIAGNOSTIC 417 if (uio->uio_rw != UIO_WRITE) 418 panic("nfs_write mode"); 419 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 420 panic("nfs_write proc"); 421 #endif 422 if (vp->v_type != VREG) 423 return (EIO); 424 if (np->n_flag & NWRITEERR) { 425 np->n_flag &= ~NWRITEERR; 426 return (np->n_error); 427 } 428 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 429 (void)nfs_fsinfo(nmp, vp, cred, p); 430 if (ioflag & (IO_APPEND | IO_SYNC)) { 431 if (np->n_flag & NMODIFIED) { 432 np->n_attrstamp = 0; 433 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 434 if (error) 435 return (error); 436 } 437 if (ioflag & IO_APPEND) { 438 np->n_attrstamp = 0; 439 error = VOP_GETATTR(vp, &vattr, cred, p); 440 if (error) 441 return (error); 442 uio->uio_offset = np->n_size; 443 } 444 } 445 if (uio->uio_offset < 0) 446 return (EINVAL); 447 if (uio->uio_resid == 0) 448 return (0); 449 /* 450 * Maybe this should be above the vnode op call, but so long as 451 * file servers have no limits, i don't think it matters 452 */ 453 if (p && uio->uio_offset + uio->uio_resid > 454 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 455 psignal(p, SIGXFSZ); 456 return (EFBIG); 457 } 458 /* 459 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 460 * will be the same size within a filesystem. nfs_writerpc will 461 * still use nm_wsize when sizing the rpc's. 462 */ 463 biosize = nmp->nm_rsize; 464 do { 465 466 /* 467 * XXX make sure we aren't cached in the VM page cache 468 */ 469 (void)vnode_pager_uncache(vp); 470 471 /* 472 * Check for a valid write lease. 473 */ 474 if ((nmp->nm_flag & NFSMNT_NQNFS) && 475 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 476 do { 477 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 478 } while (error == NQNFS_EXPIRED); 479 if (error) 480 return (error); 481 if (np->n_lrev != np->n_brev || 482 (np->n_flag & NQNFSNONCACHE)) { 483 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 484 if (error) 485 return (error); 486 np->n_brev = np->n_lrev; 487 } 488 } 489 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { 490 iomode = NFSV3WRITE_FILESYNC; 491 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); 492 if (must_commit) 493 nfs_clearcommit(vp->v_mount); 494 return (error); 495 } 496 nfsstats.biocache_writes++; 497 lbn = uio->uio_offset / biosize; 498 on = uio->uio_offset & (biosize-1); 499 n = min((unsigned)(biosize - on), uio->uio_resid); 500 bn = lbn * (biosize / DEV_BSIZE); 501 again: 502 bp = nfs_getcacheblk(vp, bn, biosize, p); 503 if (!bp) 504 return (EINTR); 505 if (bp->b_wcred == NOCRED) { 506 crhold(cred); 507 bp->b_wcred = cred; 508 } 509 np->n_flag |= NMODIFIED; 510 if (uio->uio_offset + n > np->n_size) { 511 np->n_size = uio->uio_offset + n; 512 vnode_pager_setsize(vp, (u_long)np->n_size); 513 } 514 515 /* 516 * If the new write will leave a contiguous dirty 517 * area, just update the b_dirtyoff and b_dirtyend, 518 * otherwise force a write rpc of the old dirty area. 519 */ 520 if (bp->b_dirtyend > 0 && 521 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 522 bp->b_proc = p; 523 if (VOP_BWRITE(bp) == EINTR) 524 return (EINTR); 525 goto again; 526 } 527 528 /* 529 * Check for valid write lease and get one as required. 530 * In case getblk() and/or bwrite() delayed us. 531 */ 532 if ((nmp->nm_flag & NFSMNT_NQNFS) && 533 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 534 do { 535 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 536 } while (error == NQNFS_EXPIRED); 537 if (error) { 538 brelse(bp); 539 return (error); 540 } 541 if (np->n_lrev != np->n_brev || 542 (np->n_flag & NQNFSNONCACHE)) { 543 brelse(bp); 544 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 545 if (error) 546 return (error); 547 np->n_brev = np->n_lrev; 548 goto again; 549 } 550 } 551 error = uiomove((char *)bp->b_data + on, n, uio); 552 if (error) { 553 bp->b_flags |= B_ERROR; 554 brelse(bp); 555 return (error); 556 } 557 if (bp->b_dirtyend > 0) { 558 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 559 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 560 } else { 561 bp->b_dirtyoff = on; 562 bp->b_dirtyend = on + n; 563 } 564 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 565 bp->b_validoff > bp->b_dirtyend) { 566 bp->b_validoff = bp->b_dirtyoff; 567 bp->b_validend = bp->b_dirtyend; 568 } else { 569 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 570 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 571 } 572 573 /* 574 * Since this block is being modified, it must be written 575 * again and not just committed. 576 */ 577 bp->b_flags &= ~B_NEEDCOMMIT; 578 579 /* 580 * If the lease is non-cachable or IO_SYNC do bwrite(). 581 */ 582 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 583 bp->b_proc = p; 584 error = VOP_BWRITE(bp); 585 if (error) 586 return (error); 587 if (np->n_flag & NQNFSNONCACHE) { 588 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 589 if (error) 590 return (error); 591 } 592 } else if ((n + on) == biosize && 593 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 594 bp->b_proc = (struct proc *)0; 595 bp->b_flags |= B_ASYNC; 596 (void)nfs_writebp(bp, 0); 597 } else { 598 bdwrite(bp); 599 } 600 } while (uio->uio_resid > 0 && n > 0); 601 return (0); 602 } 603 604 /* 605 * Get an nfs cache block. 606 * Allocate a new one if the block isn't currently in the cache 607 * and return the block marked busy. If the calling process is 608 * interrupted by a signal for an interruptible mount point, return 609 * NULL. 610 */ 611 struct buf * 612 nfs_getcacheblk(vp, bn, size, p) 613 struct vnode *vp; 614 daddr_t bn; 615 int size; 616 struct proc *p; 617 { 618 register struct buf *bp; 619 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 620 621 if (nmp->nm_flag & NFSMNT_INT) { 622 bp = getblk(vp, bn, size, PCATCH, 0); 623 while (bp == (struct buf *)0) { 624 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 625 return ((struct buf *)0); 626 bp = getblk(vp, bn, size, 0, 2 * hz); 627 } 628 } else 629 bp = getblk(vp, bn, size, 0, 0); 630 return (bp); 631 } 632 633 /* 634 * Flush and invalidate all dirty buffers. If another process is already 635 * doing the flush, just wait for completion. 636 */ 637 int 638 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 639 struct vnode *vp; 640 int flags; 641 struct ucred *cred; 642 struct proc *p; 643 int intrflg; 644 { 645 register struct nfsnode *np = VTONFS(vp); 646 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 647 int error = 0, slpflag, slptimeo; 648 649 if ((nmp->nm_flag & NFSMNT_INT) == 0) 650 intrflg = 0; 651 if (intrflg) { 652 slpflag = PCATCH; 653 slptimeo = 2 * hz; 654 } else { 655 slpflag = 0; 656 slptimeo = 0; 657 } 658 /* 659 * First wait for any other process doing a flush to complete. 660 */ 661 while (np->n_flag & NFLUSHINPROG) { 662 np->n_flag |= NFLUSHWANT; 663 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 664 slptimeo); 665 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 666 return (EINTR); 667 } 668 669 /* 670 * Now, flush as required. 671 */ 672 np->n_flag |= NFLUSHINPROG; 673 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 674 while (error) { 675 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 676 np->n_flag &= ~NFLUSHINPROG; 677 if (np->n_flag & NFLUSHWANT) { 678 np->n_flag &= ~NFLUSHWANT; 679 wakeup((caddr_t)&np->n_flag); 680 } 681 return (EINTR); 682 } 683 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 684 } 685 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 686 if (np->n_flag & NFLUSHWANT) { 687 np->n_flag &= ~NFLUSHWANT; 688 wakeup((caddr_t)&np->n_flag); 689 } 690 return (0); 691 } 692 693 /* 694 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 695 * This is mainly to avoid queueing async I/O requests when the nfsiods 696 * are all hung on a dead server. 697 */ 698 int 699 nfs_asyncio(bp, cred) 700 register struct buf *bp; 701 struct ucred *cred; 702 { 703 register int i; 704 705 if (nfs_numasync == 0) 706 return (EIO); 707 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 708 if (nfs_iodwant[i]) { 709 if (bp->b_flags & B_READ) { 710 if (bp->b_rcred == NOCRED && cred != NOCRED) { 711 crhold(cred); 712 bp->b_rcred = cred; 713 } 714 } else { 715 bp->b_flags |= B_WRITEINPROG; 716 if (bp->b_wcred == NOCRED && cred != NOCRED) { 717 crhold(cred); 718 bp->b_wcred = cred; 719 } 720 } 721 722 TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist); 723 nfs_iodwant[i] = (struct proc *)0; 724 wakeup((caddr_t)&nfs_iodwant[i]); 725 return (0); 726 } 727 728 /* 729 * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE 730 * return EIO so the process will call nfs_doio() and do it 731 * synchronously. 732 */ 733 if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE)) 734 return (EIO); 735 736 /* 737 * Just turn the async write into a delayed write, instead of 738 * doing in synchronously. Hopefully, at least one of the nfsiods 739 * is currently doing a write for this file and will pick up the 740 * delayed writes before going back to sleep. 741 */ 742 bp->b_flags |= B_DELWRI; 743 reassignbuf(bp, bp->b_vp); 744 biodone(bp); 745 return (0); 746 } 747 748 /* 749 * Do an I/O operation to/from a cache block. This may be called 750 * synchronously or from an nfsiod. 751 */ 752 int 753 nfs_doio(bp, cr, p) 754 register struct buf *bp; 755 struct ucred *cr; 756 struct proc *p; 757 { 758 register struct uio *uiop; 759 register struct vnode *vp; 760 struct nfsnode *np; 761 struct nfsmount *nmp; 762 int error = 0, diff, len, iomode, must_commit = 0; 763 struct uio uio; 764 struct iovec io; 765 766 vp = bp->b_vp; 767 np = VTONFS(vp); 768 nmp = VFSTONFS(vp->v_mount); 769 uiop = &uio; 770 uiop->uio_iov = &io; 771 uiop->uio_iovcnt = 1; 772 uiop->uio_segflg = UIO_SYSSPACE; 773 uiop->uio_procp = p; 774 775 /* 776 * Historically, paging was done with physio, but no more... 777 */ 778 if (bp->b_flags & B_PHYS) { 779 /* 780 * ...though reading /dev/drum still gets us here. 781 */ 782 io.iov_len = uiop->uio_resid = bp->b_bcount; 783 /* mapping was done by vmapbuf() */ 784 io.iov_base = bp->b_data; 785 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 786 if (bp->b_flags & B_READ) { 787 uiop->uio_rw = UIO_READ; 788 nfsstats.read_physios++; 789 error = nfs_readrpc(vp, uiop, cr); 790 } else { 791 iomode = NFSV3WRITE_DATASYNC; 792 uiop->uio_rw = UIO_WRITE; 793 nfsstats.write_physios++; 794 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); 795 } 796 if (error) { 797 bp->b_flags |= B_ERROR; 798 bp->b_error = error; 799 } 800 } else if (bp->b_flags & B_READ) { 801 io.iov_len = uiop->uio_resid = bp->b_bcount; 802 io.iov_base = bp->b_data; 803 uiop->uio_rw = UIO_READ; 804 switch (vp->v_type) { 805 case VREG: 806 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 807 nfsstats.read_bios++; 808 error = nfs_readrpc(vp, uiop, cr); 809 if (!error) { 810 bp->b_validoff = 0; 811 if (uiop->uio_resid) { 812 /* 813 * If len > 0, there is a hole in the file and 814 * no writes after the hole have been pushed to 815 * the server yet. 816 * Just zero fill the rest of the valid area. 817 */ 818 diff = bp->b_bcount - uiop->uio_resid; 819 len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE 820 + diff); 821 if (len > 0) { 822 len = min(len, uiop->uio_resid); 823 bzero((char *)bp->b_data + diff, len); 824 bp->b_validend = diff + len; 825 } else 826 bp->b_validend = diff; 827 } else 828 bp->b_validend = bp->b_bcount; 829 } 830 if (p && (vp->v_flag & VTEXT) && 831 (((nmp->nm_flag & NFSMNT_NQNFS) && 832 NQNFS_CKINVALID(vp, np, ND_READ) && 833 np->n_lrev != np->n_brev) || 834 (!(nmp->nm_flag & NFSMNT_NQNFS) && 835 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { 836 uprintf("Process killed due to text file modification\n"); 837 psignal(p, SIGKILL); 838 p->p_holdcnt++; 839 } 840 break; 841 case VLNK: 842 uiop->uio_offset = (off_t)0; 843 nfsstats.readlink_bios++; 844 error = nfs_readlinkrpc(vp, uiop, cr); 845 break; 846 case VDIR: 847 nfsstats.readdir_bios++; 848 uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; 849 if (nmp->nm_flag & NFSMNT_RDIRPLUS) { 850 error = nfs_readdirplusrpc(vp, uiop, cr); 851 if (error == NFSERR_NOTSUPP) 852 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 853 } 854 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) 855 error = nfs_readdirrpc(vp, uiop, cr); 856 break; 857 default: 858 printf("nfs_doio: type %x unexpected\n",vp->v_type); 859 break; 860 }; 861 if (error) { 862 bp->b_flags |= B_ERROR; 863 bp->b_error = error; 864 } 865 } else { 866 io.iov_len = uiop->uio_resid = bp->b_dirtyend 867 - bp->b_dirtyoff; 868 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE 869 + bp->b_dirtyoff; 870 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 871 uiop->uio_rw = UIO_WRITE; 872 nfsstats.write_bios++; 873 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) 874 iomode = NFSV3WRITE_UNSTABLE; 875 else 876 iomode = NFSV3WRITE_FILESYNC; 877 bp->b_flags |= B_WRITEINPROG; 878 #ifdef fvdl_debug 879 printf("nfs_doio(%x): bp %x doff %d dend %d\n", 880 vp, bp, bp->b_dirtyoff, bp->b_dirtyend); 881 #endif 882 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); 883 if (!error && iomode == NFSV3WRITE_UNSTABLE) 884 bp->b_flags |= B_NEEDCOMMIT; 885 else 886 bp->b_flags &= ~B_NEEDCOMMIT; 887 bp->b_flags &= ~B_WRITEINPROG; 888 889 /* 890 * For an interrupted write, the buffer is still valid and the 891 * write hasn't been pushed to the server yet, so we can't set 892 * B_ERROR and report the interruption by setting B_EINTR. For 893 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 894 * is essentially a noop. 895 * For the case of a V3 write rpc not being committed to stable 896 * storage, the block is still dirty and requires either a commit 897 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC 898 * before the block is reused. This is indicated by setting the 899 * B_DELWRI and B_NEEDCOMMIT flags. 900 */ 901 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 902 bp->b_flags |= B_DELWRI; 903 904 /* 905 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 906 * buffer to the clean list, we have to reassign it back to the 907 * dirty one. Ugh. 908 */ 909 if (bp->b_flags & B_ASYNC) 910 reassignbuf(bp, vp); 911 else if (error) 912 bp->b_flags |= B_EINTR; 913 } else { 914 if (error) { 915 bp->b_flags |= B_ERROR; 916 bp->b_error = np->n_error = error; 917 np->n_flag |= NWRITEERR; 918 } 919 bp->b_dirtyoff = bp->b_dirtyend = 0; 920 } 921 } 922 bp->b_resid = uiop->uio_resid; 923 if (must_commit) 924 nfs_clearcommit(vp->v_mount); 925 biodone(bp); 926 return (error); 927 } 928