1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_bio.c 7.35 (Berkeley) 11/05/92 11 */ 12 13 #include <sys/param.h> 14 #include <sys/systm.h> 15 #include <sys/resourcevar.h> 16 #include <sys/proc.h> 17 #include <sys/buf.h> 18 #include <sys/vnode.h> 19 #include <sys/trace.h> 20 #include <sys/mount.h> 21 #include <sys/kernel.h> 22 23 #include <vm/vm.h> 24 25 #include <nfs/nfsnode.h> 26 #include <nfs/rpcv2.h> 27 #include <nfs/nfsv2.h> 28 #include <nfs/nfs.h> 29 #include <nfs/nfsmount.h> 30 #include <nfs/nqnfs.h> 31 32 /* True and false, how exciting */ 33 #define TRUE 1 34 #define FALSE 0 35 36 /* 37 * Vnode op for read using bio 38 * Any similarity to readip() is purely coincidental 39 */ 40 nfs_bioread(vp, uio, ioflag, cred) 41 register struct vnode *vp; 42 register struct uio *uio; 43 int ioflag; 44 struct ucred *cred; 45 { 46 register struct nfsnode *np = VTONFS(vp); 47 register int biosize; 48 struct buf *bp; 49 struct vattr vattr; 50 struct nfsmount *nmp; 51 daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; 52 int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; 53 int n, on; 54 55 #ifdef lint 56 ioflag = ioflag; 57 #endif /* lint */ 58 #ifdef DIAGNOSTIC 59 if (uio->uio_rw != UIO_READ) 60 panic("nfs_read mode"); 61 #endif 62 if (uio->uio_resid == 0) 63 return (0); 64 if (uio->uio_offset < 0 && vp->v_type != VDIR) 65 return (EINVAL); 66 nmp = VFSTONFS(vp->v_mount); 67 biosize = nmp->nm_rsize; 68 /* 69 * For nfs, cache consistency can only be maintained approximately. 70 * Although RFC1094 does not specify the criteria, the following is 71 * believed to be compatible with the reference port. 72 * For nqnfs, full cache consistency is maintained within the loop. 73 * For nfs: 74 * If the file's modify time on the server has changed since the 75 * last read rpc or you have written to the file, 76 * you may have lost data cache consistency with the 77 * server, so flush all of the file's data out of the cache. 78 * Then force a getattr rpc to ensure that you have up to date 79 * attributes. 80 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 81 * the ones changing the modify time. 82 * NB: This implies that cache data can be read when up to 83 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 84 * attributes this could be forced by setting n_attrstamp to 0 before 85 * the VOP_GETATTR() call. 86 */ 87 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 88 if (np->n_flag & NMODIFIED) { 89 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 90 vp->v_type != VREG) 91 NFS_VINVBUF(np, vp, TRUE, cred, uio->uio_procp); 92 np->n_attrstamp = 0; 93 np->n_direofoffset = 0; 94 if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 95 return (error); 96 np->n_mtime = vattr.va_mtime.ts_sec; 97 } else { 98 if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 99 return (error); 100 if (np->n_mtime != vattr.va_mtime.ts_sec) { 101 np->n_direofoffset = 0; 102 NFS_VINVBUF(np, vp, TRUE, cred, uio->uio_procp); 103 np->n_mtime = vattr.va_mtime.ts_sec; 104 } 105 } 106 } 107 do { 108 109 /* 110 * Get a valid lease. If cached data is stale, flush it. 111 */ 112 if ((nmp->nm_flag & NFSMNT_NQNFS) && 113 NQNFS_CKINVALID(vp, np, NQL_READ)) { 114 do { 115 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 116 } while (error == NQNFS_EXPIRED); 117 if (error) 118 return (error); 119 if (np->n_lrev != np->n_brev || 120 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 121 if (vp->v_type == VDIR) { 122 np->n_direofoffset = 0; 123 cache_purge(vp); 124 } 125 NFS_VINVBUF(np, vp, TRUE, cred, uio->uio_procp); 126 np->n_brev = np->n_lrev; 127 } 128 } 129 if (np->n_flag & NQNFSNONCACHE) { 130 switch (vp->v_type) { 131 case VREG: 132 error = nfs_readrpc(vp, uio, cred); 133 break; 134 case VLNK: 135 error = nfs_readlinkrpc(vp, uio, cred); 136 break; 137 case VDIR: 138 error = nfs_readdirrpc(vp, uio, cred); 139 break; 140 }; 141 return (error); 142 } 143 switch (vp->v_type) { 144 case VREG: 145 nfsstats.biocache_reads++; 146 lbn = uio->uio_offset / biosize; 147 on = uio->uio_offset & (biosize-1); 148 n = min((unsigned)(biosize - on), uio->uio_resid); 149 diff = np->n_size - uio->uio_offset; 150 if (diff <= 0) 151 return (error); 152 if (diff < n) 153 n = diff; 154 bn = lbn*(biosize/DEV_BSIZE); 155 for (nra = 0; nra < nmp->nm_readahead && 156 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 157 rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 158 rasize[nra] = biosize; 159 } 160 again: 161 if (nra > 0 && lbn >= vp->v_lastr) 162 error = breadn(vp, bn, biosize, rablock, rasize, nra, 163 cred, &bp); 164 else 165 error = bread(vp, bn, biosize, cred, &bp); 166 if (bp->b_validend > 0) { 167 if (on < bp->b_validoff || (on+n) > bp->b_validend) { 168 bp->b_flags |= B_INVAL; 169 if (bp->b_dirtyend > 0) { 170 if ((bp->b_flags & B_DELWRI) == 0) 171 panic("nfsbioread"); 172 (void) bwrite(bp); 173 } else 174 brelse(bp); 175 goto again; 176 } 177 } else { 178 bp->b_validoff = 0; 179 bp->b_validend = biosize - bp->b_resid; 180 } 181 vp->v_lastr = lbn; 182 if (bp->b_resid) { 183 diff = (on >= (biosize-bp->b_resid)) ? 0 : 184 (biosize-bp->b_resid-on); 185 n = min(n, diff); 186 } 187 break; 188 case VLNK: 189 nfsstats.biocache_readlinks++; 190 on = 0; 191 error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); 192 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 193 break; 194 case VDIR: 195 nfsstats.biocache_readdirs++; 196 on = 0; 197 error = bread(vp, (daddr_t)uio->uio_offset, NFS_DIRBLKSIZ, 198 cred, &bp); 199 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 200 break; 201 }; 202 if (error) { 203 brelse(bp); 204 return (error); 205 } 206 207 /* 208 * For nqnfs: 209 * Must check for valid lease, since it may have expired while in 210 * bread(). If expired, get a lease. 211 * If data is stale, flush and try again. 212 * nb: If a read rpc is done by bread() or breada() and there is 213 * no valid lease, a get_lease request will be piggy backed. 214 */ 215 if (nmp->nm_flag & NFSMNT_NQNFS) { 216 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 217 do { 218 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 219 } while (error == NQNFS_EXPIRED); 220 if (error) { 221 brelse(bp); 222 return (error); 223 } 224 if ((np->n_flag & NQNFSNONCACHE) || 225 np->n_lrev != np->n_brev || 226 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 227 if (vp->v_type == VDIR) { 228 np->n_direofoffset = 0; 229 cache_purge(vp); 230 } 231 brelse(bp); 232 NFS_VINVBUF(np, vp, TRUE, cred, uio->uio_procp); 233 np->n_brev = np->n_lrev; 234 continue; 235 } 236 } else if ((np->n_flag & NQNFSNONCACHE) || 237 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 238 np->n_direofoffset = 0; 239 brelse(bp); 240 NFS_VINVBUF(np, vp, TRUE, cred, uio->uio_procp); 241 np->n_brev = np->n_lrev; 242 continue; 243 } 244 } 245 if (n > 0) 246 error = uiomove(bp->b_un.b_addr + on, (int)n, uio); 247 switch (vp->v_type) { 248 case VREG: 249 if (n+on == biosize || uio->uio_offset == np->n_size) 250 bp->b_flags |= B_AGE; 251 break; 252 case VLNK: 253 n = 0; 254 break; 255 case VDIR: 256 uio->uio_offset = bp->b_blkno; 257 break; 258 }; 259 brelse(bp); 260 } while (error == 0 && uio->uio_resid > 0 && n != 0); 261 return (error); 262 } 263 264 /* 265 * Vnode op for write using bio 266 */ 267 nfs_write(ap) 268 struct vop_write_args /* { 269 struct vnode *a_vp; 270 struct uio *a_uio; 271 int a_ioflag; 272 struct ucred *a_cred; 273 } */ *ap; 274 { 275 register int biosize; 276 register struct uio *uio = ap->a_uio; 277 struct proc *p = uio->uio_procp; 278 register struct vnode *vp = ap->a_vp; 279 struct nfsnode *np = VTONFS(vp); 280 register struct ucred *cred = ap->a_cred; 281 int ioflag = ap->a_ioflag; 282 struct buf *bp; 283 struct vattr vattr; 284 struct nfsmount *nmp; 285 daddr_t lbn, bn; 286 int n, on, error = 0; 287 288 #ifdef DIAGNOSTIC 289 if (uio->uio_rw != UIO_WRITE) 290 panic("nfs_write mode"); 291 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 292 panic("nfs_write proc"); 293 #endif 294 if (vp->v_type != VREG) 295 return (EIO); 296 if (np->n_flag & NWRITEERR) { 297 np->n_flag &= ~NWRITEERR; 298 return (np->n_error); 299 } 300 if (ioflag & (IO_APPEND | IO_SYNC)) { 301 if (np->n_flag & NMODIFIED) { 302 np->n_attrstamp = 0; 303 NFS_VINVBUF(np, vp, TRUE, cred, p); 304 } 305 if (ioflag & IO_APPEND) { 306 np->n_attrstamp = 0; 307 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 308 return (error); 309 uio->uio_offset = np->n_size; 310 } 311 } 312 nmp = VFSTONFS(vp->v_mount); 313 if (uio->uio_offset < 0) 314 return (EINVAL); 315 if (uio->uio_resid == 0) 316 return (0); 317 /* 318 * Maybe this should be above the vnode op call, but so long as 319 * file servers have no limits, i don't think it matters 320 */ 321 if (p && uio->uio_offset + uio->uio_resid > 322 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 323 psignal(p, SIGXFSZ); 324 return (EFBIG); 325 } 326 /* 327 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 328 * will be the same size within a filesystem. nfs_writerpc will 329 * still use nm_wsize when sizing the rpc's. 330 */ 331 biosize = nmp->nm_rsize; 332 np->n_flag |= NMODIFIED; 333 do { 334 335 /* 336 * Check for a valid write lease. 337 * If non-cachable, just do the rpc 338 */ 339 if ((nmp->nm_flag & NFSMNT_NQNFS) && 340 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 341 do { 342 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 343 } while (error == NQNFS_EXPIRED); 344 if (error) 345 return (error); 346 if (np->n_lrev != np->n_brev || 347 (np->n_flag & NQNFSNONCACHE)) { 348 NFS_VINVBUF(np, vp, TRUE, cred, p); 349 np->n_brev = np->n_lrev; 350 } 351 } 352 if (np->n_flag & NQNFSNONCACHE) 353 return (nfs_writerpc(vp, uio, cred, 0)); 354 nfsstats.biocache_writes++; 355 lbn = uio->uio_offset / biosize; 356 on = uio->uio_offset & (biosize-1); 357 n = min((unsigned)(biosize - on), uio->uio_resid); 358 if (uio->uio_offset + n > np->n_size) { 359 np->n_size = uio->uio_offset + n; 360 vnode_pager_setsize(vp, (u_long)np->n_size); 361 } 362 bn = lbn * (biosize / DEV_BSIZE); 363 again: 364 bp = getblk(vp, bn, biosize); 365 if (bp->b_wcred == NOCRED) { 366 crhold(cred); 367 bp->b_wcred = cred; 368 } 369 370 /* 371 * If the new write will leave a contiguous dirty 372 * area, just update the b_dirtyoff and b_dirtyend, 373 * otherwise force a write rpc of the old dirty area. 374 */ 375 if (bp->b_dirtyend > 0 && 376 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 377 bp->b_proc = p; 378 if (error = bwrite(bp)) 379 return (error); 380 goto again; 381 } 382 383 /* 384 * Check for valid write lease and get one as required. 385 * In case getblk() and/or bwrite() delayed us. 386 */ 387 if ((nmp->nm_flag & NFSMNT_NQNFS) && 388 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 389 do { 390 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 391 } while (error == NQNFS_EXPIRED); 392 if (error) { 393 brelse(bp); 394 return (error); 395 } 396 if (np->n_lrev != np->n_brev || 397 (np->n_flag & NQNFSNONCACHE)) { 398 brelse(bp); 399 NFS_VINVBUF(np, vp, TRUE, cred, p); 400 np->n_brev = np->n_lrev; 401 goto again; 402 } 403 } 404 if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { 405 brelse(bp); 406 return (error); 407 } 408 if (bp->b_dirtyend > 0) { 409 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 410 bp->b_dirtyend = max((on+n), bp->b_dirtyend); 411 } else { 412 bp->b_dirtyoff = on; 413 bp->b_dirtyend = on+n; 414 } 415 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 416 bp->b_validoff > bp->b_dirtyend) { 417 bp->b_validoff = bp->b_dirtyoff; 418 bp->b_validend = bp->b_dirtyend; 419 } else { 420 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 421 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 422 } 423 424 /* 425 * If the lease is non-cachable or IO_SYNC do bwrite(). 426 */ 427 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 428 bp->b_proc = p; 429 bwrite(bp); 430 } else if ((n+on) == biosize && 431 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 432 bp->b_flags |= B_AGE; 433 bp->b_proc = (struct proc *)0; 434 bawrite(bp); 435 } else { 436 bp->b_proc = (struct proc *)0; 437 bdwrite(bp); 438 } 439 } while (error == 0 && uio->uio_resid > 0 && n != 0); 440 return (error); 441 } 442