1 /* $NetBSD: nfs_bio.c,v 1.31 1997/04/20 16:24:44 fvdl Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Rick Macklem at The University of Guelph. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 39 */ 40 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/resourcevar.h> 45 #include <sys/signalvar.h> 46 #include <sys/proc.h> 47 #include <sys/buf.h> 48 #include <sys/vnode.h> 49 #include <sys/trace.h> 50 #include <sys/mount.h> 51 #include <sys/kernel.h> 52 #include <sys/namei.h> 53 54 #include <vm/vm.h> 55 56 #include <nfs/rpcv2.h> 57 #include <nfs/nfsproto.h> 58 #include <nfs/nfs.h> 59 #include <nfs/nfsmount.h> 60 #include <nfs/nqnfs.h> 61 #include <nfs/nfsnode.h> 62 #include <nfs/nfs_var.h> 63 64 extern int nfs_numasync; 65 extern struct nfsstats nfsstats; 66 67 /* 68 * Vnode op for read using bio 69 * Any similarity to readip() is purely coincidental 70 */ 71 int 72 nfs_bioread(vp, uio, ioflag, cred) 73 register struct vnode *vp; 74 register struct uio *uio; 75 int ioflag; 76 struct ucred *cred; 77 { 78 register struct nfsnode *np = VTONFS(vp); 79 register int biosize, diff, i; 80 struct buf *bp = NULL, *rabp; 81 struct vattr vattr; 82 struct proc *p; 83 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 84 daddr_t lbn, bn, rabn; 85 caddr_t baddr; 86 int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin; 87 88 #ifdef DIAGNOSTIC 89 if (uio->uio_rw != UIO_READ) 90 panic("nfs_read mode"); 91 #endif 92 if (uio->uio_resid == 0) 93 return (0); 94 if (uio->uio_offset < 0) 95 return (EINVAL); 96 p = uio->uio_procp; 97 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 98 (void)nfs_fsinfo(nmp, vp, cred, p); 99 biosize = nmp->nm_rsize; 100 /* 101 * For nfs, cache consistency can only be maintained approximately. 102 * Although RFC1094 does not specify the criteria, the following is 103 * believed to be compatible with the reference port. 104 * For nqnfs, full cache consistency is maintained within the loop. 105 * For nfs: 106 * If the file's modify time on the server has changed since the 107 * last read rpc or you have written to the file, 108 * you may have lost data cache consistency with the 109 * server, so flush all of the file's data out of the cache. 110 * Then force a getattr rpc to ensure that you have up to date 111 * attributes. 112 * NB: This implies that cache data can be read when up to 113 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 114 * attributes this could be forced by setting n_attrstamp to 0 before 115 * the VOP_GETATTR() call. 116 */ 117 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 118 if (np->n_flag & NMODIFIED) { 119 if (vp->v_type != VREG) { 120 if (vp->v_type != VDIR) 121 panic("nfs: bioread, not dir"); 122 nfs_invaldir(vp); 123 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 124 if (error) 125 return (error); 126 } 127 np->n_attrstamp = 0; 128 error = VOP_GETATTR(vp, &vattr, cred, p); 129 if (error) 130 return (error); 131 np->n_mtime = vattr.va_mtime.tv_sec; 132 } else { 133 error = VOP_GETATTR(vp, &vattr, cred, p); 134 if (error) 135 return (error); 136 if (np->n_mtime != vattr.va_mtime.tv_sec) { 137 if (vp->v_type == VDIR) 138 nfs_invaldir(vp); 139 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 140 if (error) 141 return (error); 142 np->n_mtime = vattr.va_mtime.tv_sec; 143 } 144 } 145 } 146 do { 147 148 /* 149 * Get a valid lease. If cached data is stale, flush it. 150 */ 151 if (nmp->nm_flag & NFSMNT_NQNFS) { 152 if (NQNFS_CKINVALID(vp, np, ND_READ)) { 153 do { 154 error = nqnfs_getlease(vp, ND_READ, cred, p); 155 } while (error == NQNFS_EXPIRED); 156 if (error) 157 return (error); 158 if (np->n_lrev != np->n_brev || 159 (np->n_flag & NQNFSNONCACHE) || 160 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 161 if (vp->v_type == VDIR) 162 nfs_invaldir(vp); 163 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 164 if (error) 165 return (error); 166 np->n_brev = np->n_lrev; 167 } 168 } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) { 169 nfs_invaldir(vp); 170 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 171 if (error) 172 return (error); 173 } 174 } 175 /* 176 * Don't cache symlinks. 177 */ 178 if (np->n_flag & NQNFSNONCACHE 179 || ((vp->v_flag & VROOT) && vp->v_type == VLNK)) { 180 switch (vp->v_type) { 181 case VREG: 182 return (nfs_readrpc(vp, uio, cred)); 183 case VLNK: 184 return (nfs_readlinkrpc(vp, uio, cred)); 185 case VDIR: 186 break; 187 default: 188 printf(" NQNFSNONCACHE: type %x unexpected\n", 189 vp->v_type); 190 }; 191 } 192 baddr = (caddr_t)0; 193 switch (vp->v_type) { 194 case VREG: 195 nfsstats.biocache_reads++; 196 lbn = uio->uio_offset / biosize; 197 on = uio->uio_offset & (biosize - 1); 198 bn = lbn * (biosize / DEV_BSIZE); 199 not_readin = 1; 200 201 /* 202 * Start the read ahead(s), as required. 203 */ 204 if (nfs_numasync > 0 && nmp->nm_readahead > 0) { 205 for (nra = 0; nra < nmp->nm_readahead && 206 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 207 rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 208 if (!incore(vp, rabn)) { 209 rabp = nfs_getcacheblk(vp, rabn, biosize, p); 210 if (!rabp) 211 return (EINTR); 212 if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) { 213 rabp->b_flags |= (B_READ | B_ASYNC); 214 if (nfs_asyncio(rabp, cred)) { 215 rabp->b_flags |= B_INVAL; 216 brelse(rabp); 217 } 218 } else 219 brelse(rabp); 220 } 221 } 222 } 223 224 /* 225 * If the block is in the cache and has the required data 226 * in a valid region, just copy it out. 227 * Otherwise, get the block and write back/read in, 228 * as required. 229 */ 230 if ((bp = incore(vp, bn)) && 231 (bp->b_flags & (B_BUSY | B_WRITEINPROG)) == 232 (B_BUSY | B_WRITEINPROG)) 233 got_buf = 0; 234 else { 235 again: 236 bp = nfs_getcacheblk(vp, bn, biosize, p); 237 if (!bp) 238 return (EINTR); 239 got_buf = 1; 240 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 241 bp->b_flags |= B_READ; 242 not_readin = 0; 243 error = nfs_doio(bp, cred, p); 244 if (error) { 245 brelse(bp); 246 return (error); 247 } 248 } 249 } 250 n = min((unsigned)(biosize - on), uio->uio_resid); 251 diff = np->n_size - uio->uio_offset; 252 if (diff < n) 253 n = diff; 254 if (not_readin && n > 0) { 255 if (on < bp->b_validoff || (on + n) > bp->b_validend) { 256 if (!got_buf) { 257 bp = nfs_getcacheblk(vp, bn, biosize, p); 258 if (!bp) 259 return (EINTR); 260 got_buf = 1; 261 } 262 bp->b_flags |= B_INVAFTERWRITE; 263 if (bp->b_dirtyend > 0) { 264 if ((bp->b_flags & B_DELWRI) == 0) 265 panic("nfsbioread"); 266 if (VOP_BWRITE(bp) == EINTR) 267 return (EINTR); 268 } else 269 brelse(bp); 270 goto again; 271 } 272 } 273 vp->v_lastr = lbn; 274 diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on); 275 if (diff < n) 276 n = diff; 277 break; 278 case VLNK: 279 nfsstats.biocache_readlinks++; 280 bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p); 281 if (!bp) 282 return (EINTR); 283 if ((bp->b_flags & B_DONE) == 0) { 284 bp->b_flags |= B_READ; 285 error = nfs_doio(bp, cred, p); 286 if (error) { 287 brelse(bp); 288 return (error); 289 } 290 } 291 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 292 got_buf = 1; 293 on = 0; 294 break; 295 case VDIR: 296 if (uio->uio_resid < NFS_READDIRBLKSIZ) 297 return (0); 298 nfsstats.biocache_readdirs++; 299 lbn = uio->uio_offset / NFS_DIRBLKSIZ; 300 on = uio->uio_offset & (NFS_DIRBLKSIZ - 1); 301 bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p); 302 if (!bp) 303 return (EINTR); 304 if ((bp->b_flags & B_DONE) == 0) { 305 bp->b_flags |= B_READ; 306 error = nfs_doio(bp, cred, p); 307 if (error) { 308 brelse(bp); 309 while (error == NFSERR_BAD_COOKIE) { 310 nfs_invaldir(vp); 311 error = nfs_vinvalbuf(vp, 0, cred, p, 1); 312 /* 313 * Yuck! The directory has been modified on the 314 * server. The only way to get the block is by 315 * reading from the beginning to get all the 316 * offset cookies. 317 */ 318 for (i = 0; i <= lbn && !error; i++) { 319 bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p); 320 if (!bp) 321 return (EINTR); 322 if ((bp->b_flags & B_DONE) == 0) { 323 bp->b_flags |= B_READ; 324 error = nfs_doio(bp, cred, p); 325 if (error) 326 brelse(bp); 327 } 328 } 329 } 330 if (error) 331 return (error); 332 } 333 } 334 335 /* 336 * If not eof and read aheads are enabled, start one. 337 * (You need the current block first, so that you have the 338 * directory offset cookie of the next block.) 339 */ 340 if (nfs_numasync > 0 && nmp->nm_readahead > 0 && 341 (np->n_direofoffset == 0 || 342 (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) && 343 !(np->n_flag & NQNFSNONCACHE) && 344 !incore(vp, lbn + 1)) { 345 rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p); 346 if (rabp) { 347 if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) { 348 rabp->b_flags |= (B_READ | B_ASYNC); 349 if (nfs_asyncio(rabp, cred)) { 350 rabp->b_flags |= B_INVAL; 351 brelse(rabp); 352 } 353 } else 354 brelse(rabp); 355 } 356 } 357 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on); 358 got_buf = 1; 359 break; 360 default: 361 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 362 break; 363 }; 364 365 if (n > 0) { 366 if (!baddr) 367 baddr = bp->b_data; 368 error = uiomove(baddr + on, (int)n, uio); 369 } 370 switch (vp->v_type) { 371 case VREG: 372 break; 373 case VLNK: 374 n = 0; 375 break; 376 case VDIR: 377 if (np->n_flag & NQNFSNONCACHE) 378 bp->b_flags |= B_INVAL; 379 break; 380 default: 381 printf(" nfsbioread: type %x unexpected\n",vp->v_type); 382 } 383 if (got_buf) 384 brelse(bp); 385 } while (error == 0 && uio->uio_resid > 0 && n > 0); 386 return (error); 387 } 388 389 /* 390 * Vnode op for write using bio 391 */ 392 int 393 nfs_write(v) 394 void *v; 395 { 396 struct vop_write_args /* { 397 struct vnode *a_vp; 398 struct uio *a_uio; 399 int a_ioflag; 400 struct ucred *a_cred; 401 } */ *ap = v; 402 register int biosize; 403 register struct uio *uio = ap->a_uio; 404 struct proc *p = uio->uio_procp; 405 register struct vnode *vp = ap->a_vp; 406 struct nfsnode *np = VTONFS(vp); 407 register struct ucred *cred = ap->a_cred; 408 int ioflag = ap->a_ioflag; 409 struct buf *bp; 410 struct vattr vattr; 411 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 412 daddr_t lbn, bn; 413 int n, on, error = 0, iomode, must_commit; 414 415 #ifdef DIAGNOSTIC 416 if (uio->uio_rw != UIO_WRITE) 417 panic("nfs_write mode"); 418 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 419 panic("nfs_write proc"); 420 #endif 421 if (vp->v_type != VREG) 422 return (EIO); 423 if (np->n_flag & NWRITEERR) { 424 np->n_flag &= ~NWRITEERR; 425 return (np->n_error); 426 } 427 if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3) 428 (void)nfs_fsinfo(nmp, vp, cred, p); 429 if (ioflag & (IO_APPEND | IO_SYNC)) { 430 if (np->n_flag & NMODIFIED) { 431 np->n_attrstamp = 0; 432 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 433 if (error) 434 return (error); 435 } 436 if (ioflag & IO_APPEND) { 437 np->n_attrstamp = 0; 438 error = VOP_GETATTR(vp, &vattr, cred, p); 439 if (error) 440 return (error); 441 uio->uio_offset = np->n_size; 442 } 443 } 444 if (uio->uio_offset < 0) 445 return (EINVAL); 446 if (uio->uio_resid == 0) 447 return (0); 448 /* 449 * Maybe this should be above the vnode op call, but so long as 450 * file servers have no limits, i don't think it matters 451 */ 452 if (p && uio->uio_offset + uio->uio_resid > 453 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 454 psignal(p, SIGXFSZ); 455 return (EFBIG); 456 } 457 /* 458 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 459 * will be the same size within a filesystem. nfs_writerpc will 460 * still use nm_wsize when sizing the rpc's. 461 */ 462 biosize = nmp->nm_rsize; 463 do { 464 465 /* 466 * XXX make sure we aren't cached in the VM page cache 467 */ 468 (void)vnode_pager_uncache(vp); 469 470 /* 471 * Check for a valid write lease. 472 */ 473 if ((nmp->nm_flag & NFSMNT_NQNFS) && 474 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 475 do { 476 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 477 } while (error == NQNFS_EXPIRED); 478 if (error) 479 return (error); 480 if (np->n_lrev != np->n_brev || 481 (np->n_flag & NQNFSNONCACHE)) { 482 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 483 if (error) 484 return (error); 485 np->n_brev = np->n_lrev; 486 } 487 } 488 if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) { 489 iomode = NFSV3WRITE_FILESYNC; 490 error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit); 491 if (must_commit) 492 nfs_clearcommit(vp->v_mount); 493 return (error); 494 } 495 nfsstats.biocache_writes++; 496 lbn = uio->uio_offset / biosize; 497 on = uio->uio_offset & (biosize-1); 498 n = min((unsigned)(biosize - on), uio->uio_resid); 499 bn = lbn * (biosize / DEV_BSIZE); 500 again: 501 bp = nfs_getcacheblk(vp, bn, biosize, p); 502 if (!bp) 503 return (EINTR); 504 if (bp->b_wcred == NOCRED) { 505 crhold(cred); 506 bp->b_wcred = cred; 507 } 508 np->n_flag |= NMODIFIED; 509 if (uio->uio_offset + n > np->n_size) { 510 np->n_size = uio->uio_offset + n; 511 vnode_pager_setsize(vp, (u_long)np->n_size); 512 } 513 514 /* 515 * If the new write will leave a contiguous dirty 516 * area, just update the b_dirtyoff and b_dirtyend, 517 * otherwise force a write rpc of the old dirty area. 518 */ 519 if (bp->b_dirtyend > 0 && 520 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 521 bp->b_proc = p; 522 if (VOP_BWRITE(bp) == EINTR) 523 return (EINTR); 524 goto again; 525 } 526 527 /* 528 * Check for valid write lease and get one as required. 529 * In case getblk() and/or bwrite() delayed us. 530 */ 531 if ((nmp->nm_flag & NFSMNT_NQNFS) && 532 NQNFS_CKINVALID(vp, np, ND_WRITE)) { 533 do { 534 error = nqnfs_getlease(vp, ND_WRITE, cred, p); 535 } while (error == NQNFS_EXPIRED); 536 if (error) { 537 brelse(bp); 538 return (error); 539 } 540 if (np->n_lrev != np->n_brev || 541 (np->n_flag & NQNFSNONCACHE)) { 542 brelse(bp); 543 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 544 if (error) 545 return (error); 546 np->n_brev = np->n_lrev; 547 goto again; 548 } 549 } 550 error = uiomove((char *)bp->b_data + on, n, uio); 551 if (error) { 552 bp->b_flags |= B_ERROR; 553 brelse(bp); 554 return (error); 555 } 556 if (bp->b_dirtyend > 0) { 557 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 558 bp->b_dirtyend = max((on + n), bp->b_dirtyend); 559 } else { 560 bp->b_dirtyoff = on; 561 bp->b_dirtyend = on + n; 562 } 563 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 564 bp->b_validoff > bp->b_dirtyend) { 565 bp->b_validoff = bp->b_dirtyoff; 566 bp->b_validend = bp->b_dirtyend; 567 } else { 568 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 569 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 570 } 571 572 /* 573 * Since this block is being modified, it must be written 574 * again and not just committed. 575 */ 576 bp->b_flags &= ~B_NEEDCOMMIT; 577 578 /* 579 * If the lease is non-cachable or IO_SYNC do bwrite(). 580 */ 581 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 582 bp->b_proc = p; 583 error = VOP_BWRITE(bp); 584 if (error) 585 return (error); 586 if (np->n_flag & NQNFSNONCACHE) { 587 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1); 588 if (error) 589 return (error); 590 } 591 } else if ((n + on) == biosize && 592 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 593 bp->b_proc = (struct proc *)0; 594 bp->b_flags |= B_ASYNC; 595 (void)nfs_writebp(bp, 0); 596 } else { 597 bdwrite(bp); 598 } 599 } while (uio->uio_resid > 0 && n > 0); 600 return (0); 601 } 602 603 /* 604 * Get an nfs cache block. 605 * Allocate a new one if the block isn't currently in the cache 606 * and return the block marked busy. If the calling process is 607 * interrupted by a signal for an interruptible mount point, return 608 * NULL. 609 */ 610 struct buf * 611 nfs_getcacheblk(vp, bn, size, p) 612 struct vnode *vp; 613 daddr_t bn; 614 int size; 615 struct proc *p; 616 { 617 register struct buf *bp; 618 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 619 620 if (nmp->nm_flag & NFSMNT_INT) { 621 bp = getblk(vp, bn, size, PCATCH, 0); 622 while (bp == (struct buf *)0) { 623 if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) 624 return ((struct buf *)0); 625 bp = getblk(vp, bn, size, 0, 2 * hz); 626 } 627 } else 628 bp = getblk(vp, bn, size, 0, 0); 629 return (bp); 630 } 631 632 /* 633 * Flush and invalidate all dirty buffers. If another process is already 634 * doing the flush, just wait for completion. 635 */ 636 int 637 nfs_vinvalbuf(vp, flags, cred, p, intrflg) 638 struct vnode *vp; 639 int flags; 640 struct ucred *cred; 641 struct proc *p; 642 int intrflg; 643 { 644 register struct nfsnode *np = VTONFS(vp); 645 struct nfsmount *nmp = VFSTONFS(vp->v_mount); 646 int error = 0, slpflag, slptimeo; 647 648 if ((nmp->nm_flag & NFSMNT_INT) == 0) 649 intrflg = 0; 650 if (intrflg) { 651 slpflag = PCATCH; 652 slptimeo = 2 * hz; 653 } else { 654 slpflag = 0; 655 slptimeo = 0; 656 } 657 /* 658 * First wait for any other process doing a flush to complete. 659 */ 660 while (np->n_flag & NFLUSHINPROG) { 661 np->n_flag |= NFLUSHWANT; 662 error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval", 663 slptimeo); 664 if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) 665 return (EINTR); 666 } 667 668 /* 669 * Now, flush as required. 670 */ 671 np->n_flag |= NFLUSHINPROG; 672 error = vinvalbuf(vp, flags, cred, p, slpflag, 0); 673 while (error) { 674 if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) { 675 np->n_flag &= ~NFLUSHINPROG; 676 if (np->n_flag & NFLUSHWANT) { 677 np->n_flag &= ~NFLUSHWANT; 678 wakeup((caddr_t)&np->n_flag); 679 } 680 return (EINTR); 681 } 682 error = vinvalbuf(vp, flags, cred, p, 0, slptimeo); 683 } 684 np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); 685 if (np->n_flag & NFLUSHWANT) { 686 np->n_flag &= ~NFLUSHWANT; 687 wakeup((caddr_t)&np->n_flag); 688 } 689 return (0); 690 } 691 692 /* 693 * Initiate asynchronous I/O. Return an error if no nfsiods are available. 694 * This is mainly to avoid queueing async I/O requests when the nfsiods 695 * are all hung on a dead server. 696 */ 697 int 698 nfs_asyncio(bp, cred) 699 register struct buf *bp; 700 struct ucred *cred; 701 { 702 register int i; 703 register struct nfsmount *nmp; 704 int gotiod, slpflag = 0, slptimeo = 0, error; 705 706 if (nfs_numasync == 0) 707 return (EIO); 708 709 710 nmp = VFSTONFS(bp->b_vp->v_mount); 711 again: 712 if (nmp->nm_flag & NFSMNT_INT) 713 slpflag = PCATCH; 714 gotiod = FALSE; 715 716 /* 717 * Find a free iod to process this request. 718 */ 719 720 for (i = 0; i < NFS_MAXASYNCDAEMON; i++) 721 if (nfs_iodwant[i]) { 722 /* 723 * Found one, so wake it up and tell it which 724 * mount to process. 725 */ 726 nfs_iodwant[i] = (struct proc *)0; 727 nfs_iodmount[i] = nmp; 728 nmp->nm_bufqiods++; 729 wakeup((caddr_t)&nfs_iodwant[i]); 730 gotiod = TRUE; 731 break; 732 } 733 /* 734 * If none are free, we may already have an iod working on this mount 735 * point. If so, it will process our request. 736 */ 737 if (!gotiod && nmp->nm_bufqiods > 0) 738 gotiod = TRUE; 739 740 /* 741 * If we have an iod which can process the request, then queue 742 * the buffer. 743 */ 744 if (gotiod) { 745 /* 746 * Ensure that the queue never grows too large. 747 */ 748 while (nmp->nm_bufqlen >= 2*nfs_numasync) { 749 nmp->nm_bufqwant = TRUE; 750 error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO, 751 "nfsaio", slptimeo); 752 if (error) { 753 if (nfs_sigintr(nmp, NULL, bp->b_proc)) 754 return (EINTR); 755 if (slpflag == PCATCH) { 756 slpflag = 0; 757 slptimeo = 2 * hz; 758 } 759 } 760 /* 761 * We might have lost our iod while sleeping, 762 * so check and loop if nescessary. 763 */ 764 if (nmp->nm_bufqiods == 0) 765 goto again; 766 } 767 768 if (bp->b_flags & B_READ) { 769 if (bp->b_rcred == NOCRED && cred != NOCRED) { 770 crhold(cred); 771 bp->b_rcred = cred; 772 } 773 } else { 774 bp->b_flags |= B_WRITEINPROG; 775 if (bp->b_wcred == NOCRED && cred != NOCRED) { 776 crhold(cred); 777 bp->b_wcred = cred; 778 } 779 } 780 781 TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist); 782 nmp->nm_bufqlen++; 783 return (0); 784 } 785 786 /* 787 * All the iods are busy on other mounts, so return EIO to 788 * force the caller to process the i/o synchronously. 789 */ 790 return (EIO); 791 } 792 793 /* 794 * Do an I/O operation to/from a cache block. This may be called 795 * synchronously or from an nfsiod. 796 */ 797 int 798 nfs_doio(bp, cr, p) 799 register struct buf *bp; 800 struct ucred *cr; 801 struct proc *p; 802 { 803 register struct uio *uiop; 804 register struct vnode *vp; 805 struct nfsnode *np; 806 struct nfsmount *nmp; 807 int error = 0, diff, len, iomode, must_commit = 0; 808 struct uio uio; 809 struct iovec io; 810 811 vp = bp->b_vp; 812 np = VTONFS(vp); 813 nmp = VFSTONFS(vp->v_mount); 814 uiop = &uio; 815 uiop->uio_iov = &io; 816 uiop->uio_iovcnt = 1; 817 uiop->uio_segflg = UIO_SYSSPACE; 818 uiop->uio_procp = p; 819 820 /* 821 * Historically, paging was done with physio, but no more... 822 */ 823 if (bp->b_flags & B_PHYS) { 824 /* 825 * ...though reading /dev/drum still gets us here. 826 */ 827 io.iov_len = uiop->uio_resid = bp->b_bcount; 828 /* mapping was done by vmapbuf() */ 829 io.iov_base = bp->b_data; 830 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 831 if (bp->b_flags & B_READ) { 832 uiop->uio_rw = UIO_READ; 833 nfsstats.read_physios++; 834 error = nfs_readrpc(vp, uiop, cr); 835 } else { 836 iomode = NFSV3WRITE_DATASYNC; 837 uiop->uio_rw = UIO_WRITE; 838 nfsstats.write_physios++; 839 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); 840 } 841 if (error) { 842 bp->b_flags |= B_ERROR; 843 bp->b_error = error; 844 } 845 } else if (bp->b_flags & B_READ) { 846 io.iov_len = uiop->uio_resid = bp->b_bcount; 847 io.iov_base = bp->b_data; 848 uiop->uio_rw = UIO_READ; 849 switch (vp->v_type) { 850 case VREG: 851 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; 852 nfsstats.read_bios++; 853 error = nfs_readrpc(vp, uiop, cr); 854 if (!error) { 855 bp->b_validoff = 0; 856 if (uiop->uio_resid) { 857 /* 858 * If len > 0, there is a hole in the file and 859 * no writes after the hole have been pushed to 860 * the server yet. 861 * Just zero fill the rest of the valid area. 862 */ 863 diff = bp->b_bcount - uiop->uio_resid; 864 len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE 865 + diff); 866 if (len > 0) { 867 len = min(len, uiop->uio_resid); 868 bzero((char *)bp->b_data + diff, len); 869 bp->b_validend = diff + len; 870 } else 871 bp->b_validend = diff; 872 } else 873 bp->b_validend = bp->b_bcount; 874 } 875 if (p && (vp->v_flag & VTEXT) && 876 (((nmp->nm_flag & NFSMNT_NQNFS) && 877 NQNFS_CKINVALID(vp, np, ND_READ) && 878 np->n_lrev != np->n_brev) || 879 (!(nmp->nm_flag & NFSMNT_NQNFS) && 880 np->n_mtime != np->n_vattr.va_mtime.tv_sec))) { 881 uprintf("Process killed due to text file modification\n"); 882 psignal(p, SIGKILL); 883 p->p_holdcnt++; 884 } 885 break; 886 case VLNK: 887 uiop->uio_offset = (off_t)0; 888 nfsstats.readlink_bios++; 889 error = nfs_readlinkrpc(vp, uiop, cr); 890 break; 891 case VDIR: 892 nfsstats.readdir_bios++; 893 uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ; 894 if (nmp->nm_flag & NFSMNT_RDIRPLUS) { 895 error = nfs_readdirplusrpc(vp, uiop, cr); 896 if (error == NFSERR_NOTSUPP) 897 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 898 } 899 if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0) 900 error = nfs_readdirrpc(vp, uiop, cr); 901 break; 902 default: 903 printf("nfs_doio: type %x unexpected\n",vp->v_type); 904 break; 905 }; 906 if (error) { 907 bp->b_flags |= B_ERROR; 908 bp->b_error = error; 909 } 910 } else { 911 io.iov_len = uiop->uio_resid = bp->b_dirtyend 912 - bp->b_dirtyoff; 913 uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE 914 + bp->b_dirtyoff; 915 io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 916 uiop->uio_rw = UIO_WRITE; 917 nfsstats.write_bios++; 918 if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC) 919 iomode = NFSV3WRITE_UNSTABLE; 920 else 921 iomode = NFSV3WRITE_FILESYNC; 922 bp->b_flags |= B_WRITEINPROG; 923 #ifdef fvdl_debug 924 printf("nfs_doio(%x): bp %x doff %d dend %d\n", 925 vp, bp, bp->b_dirtyoff, bp->b_dirtyend); 926 #endif 927 error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); 928 if (!error && iomode == NFSV3WRITE_UNSTABLE) 929 bp->b_flags |= B_NEEDCOMMIT; 930 else 931 bp->b_flags &= ~B_NEEDCOMMIT; 932 bp->b_flags &= ~B_WRITEINPROG; 933 934 /* 935 * For an interrupted write, the buffer is still valid and the 936 * write hasn't been pushed to the server yet, so we can't set 937 * B_ERROR and report the interruption by setting B_EINTR. For 938 * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt 939 * is essentially a noop. 940 * For the case of a V3 write rpc not being committed to stable 941 * storage, the block is still dirty and requires either a commit 942 * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC 943 * before the block is reused. This is indicated by setting the 944 * B_DELWRI and B_NEEDCOMMIT flags. 945 */ 946 if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 947 bp->b_flags |= B_DELWRI; 948 949 /* 950 * Since for the B_ASYNC case, nfs_bwrite() has reassigned the 951 * buffer to the clean list, we have to reassign it back to the 952 * dirty one. Ugh. 953 */ 954 if (bp->b_flags & B_ASYNC) 955 reassignbuf(bp, vp); 956 else if (error) 957 bp->b_flags |= B_EINTR; 958 } else { 959 if (error) { 960 bp->b_flags |= B_ERROR; 961 bp->b_error = np->n_error = error; 962 np->n_flag |= NWRITEERR; 963 } 964 bp->b_dirtyoff = bp->b_dirtyend = 0; 965 } 966 } 967 bp->b_resid = uiop->uio_resid; 968 if (must_commit) 969 nfs_clearcommit(vp->v_mount); 970 biodone(bp); 971 return (error); 972 } 973