1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_bio.c 7.33 (Berkeley) 10/11/92 11 */ 12 13 #include <sys/param.h> 14 #include <sys/systm.h> 15 #include <sys/resourcevar.h> 16 #include <sys/proc.h> 17 #include <sys/buf.h> 18 #include <sys/vnode.h> 19 #include <sys/trace.h> 20 #include <sys/mount.h> 21 #include <sys/kernel.h> 22 23 #include <vm/vm.h> 24 25 #include <nfs/nfsnode.h> 26 #include <nfs/rpcv2.h> 27 #include <nfs/nfsv2.h> 28 #include <nfs/nfs.h> 29 #include <nfs/nfsmount.h> 30 #include <nfs/nqnfs.h> 31 32 /* True and false, how exciting */ 33 #define TRUE 1 34 #define FALSE 0 35 36 /* 37 * Vnode op for read using bio 38 * Any similarity to readip() is purely coincidental 39 */ 40 nfs_bioread(vp, uio, ioflag, cred) 41 register struct vnode *vp; 42 register struct uio *uio; 43 int ioflag; 44 struct ucred *cred; 45 { 46 register struct nfsnode *np = VTONFS(vp); 47 register int biosize; 48 struct buf *bp; 49 struct vattr vattr; 50 struct nfsmount *nmp; 51 daddr_t lbn, bn, rablock[NFS_MAXRAHEAD]; 52 int rasize[NFS_MAXRAHEAD], nra, diff, error = 0; 53 int n, on; 54 55 #ifdef lint 56 ioflag = ioflag; 57 #endif /* lint */ 58 #ifdef DIAGNOSTIC 59 if (uio->uio_rw != UIO_READ) 60 panic("nfs_read mode"); 61 #endif 62 if (uio->uio_resid == 0) 63 return (0); 64 if (uio->uio_offset < 0 && vp->v_type != VDIR) 65 return (EINVAL); 66 nmp = VFSTONFS(vp->v_mount); 67 biosize = nmp->nm_rsize; 68 /* 69 * For nfs, cache consistency can only be maintained approximately. 70 * Although RFC1094 does not specify the criteria, the following is 71 * believed to be compatible with the reference port. 72 * For nqnfs, full cache consistency is maintained within the loop. 73 * For nfs: 74 * If the file's modify time on the server has changed since the 75 * last read rpc or you have written to the file, 76 * you may have lost data cache consistency with the 77 * server, so flush all of the file's data out of the cache. 78 * Then force a getattr rpc to ensure that you have up to date 79 * attributes. 80 * The mount flag NFSMNT_MYWRITE says "Assume that my writes are 81 * the ones changing the modify time. 82 * NB: This implies that cache data can be read when up to 83 * NFS_ATTRTIMEO seconds out of date. If you find that you need current 84 * attributes this could be forced by setting n_attrstamp to 0 before 85 * the VOP_GETATTR() call. 86 */ 87 if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) { 88 if (np->n_flag & NMODIFIED) { 89 np->n_flag &= ~NMODIFIED; 90 if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 || 91 vp->v_type != VREG) 92 vinvalbuf(vp, TRUE, cred, uio->uio_procp); 93 np->n_attrstamp = 0; 94 np->n_direofoffset = 0; 95 if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 96 return (error); 97 np->n_mtime = vattr.va_mtime.ts_sec; 98 } else { 99 if (error = VOP_GETATTR(vp, &vattr, cred, uio->uio_procp)) 100 return (error); 101 if (np->n_mtime != vattr.va_mtime.ts_sec) { 102 np->n_direofoffset = 0; 103 vinvalbuf(vp, TRUE, cred, uio->uio_procp); 104 np->n_mtime = vattr.va_mtime.ts_sec; 105 } 106 } 107 } 108 do { 109 110 /* 111 * Get a valid lease. If cached data is stale, flush it. 112 */ 113 if ((nmp->nm_flag & NFSMNT_NQNFS) && 114 NQNFS_CKINVALID(vp, np, NQL_READ)) { 115 do { 116 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 117 } while (error == NQNFS_EXPIRED); 118 if (error) 119 return (error); 120 if (np->n_lrev != np->n_brev || 121 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 122 if (vp->v_type == VDIR) { 123 np->n_direofoffset = 0; 124 cache_purge(vp); 125 } 126 np->n_flag &= ~NMODIFIED; 127 vinvalbuf(vp, TRUE, cred, uio->uio_procp); 128 np->n_brev = np->n_lrev; 129 } 130 } 131 if (np->n_flag & NQNFSNONCACHE) { 132 switch (vp->v_type) { 133 case VREG: 134 error = nfs_readrpc(vp, uio, cred); 135 break; 136 case VLNK: 137 error = nfs_readlinkrpc(vp, uio, cred); 138 break; 139 case VDIR: 140 error = nfs_readdirrpc(vp, uio, cred); 141 break; 142 }; 143 return (error); 144 } 145 switch (vp->v_type) { 146 case VREG: 147 nfsstats.biocache_reads++; 148 lbn = uio->uio_offset / biosize; 149 on = uio->uio_offset & (biosize-1); 150 n = min((unsigned)(biosize - on), uio->uio_resid); 151 diff = np->n_size - uio->uio_offset; 152 if (diff <= 0) 153 return (error); 154 if (diff < n) 155 n = diff; 156 bn = lbn*(biosize/DEV_BSIZE); 157 for (nra = 0; nra < nmp->nm_readahead && 158 (lbn + 1 + nra) * biosize < np->n_size; nra++) { 159 rablock[nra] = (lbn + 1 + nra) * (biosize / DEV_BSIZE); 160 rasize[nra] = biosize; 161 } 162 again: 163 if (nra > 0 && lbn >= vp->v_lastr) 164 error = breadn(vp, bn, biosize, rablock, rasize, nra, 165 cred, &bp); 166 else 167 error = bread(vp, bn, biosize, cred, &bp); 168 if (bp->b_validend > 0) { 169 if (on < bp->b_validoff || (on+n) > bp->b_validend) { 170 bp->b_flags |= B_INVAL; 171 if (bp->b_dirtyend > 0) { 172 if ((bp->b_flags & B_DELWRI) == 0) 173 panic("nfsbioread"); 174 (void) bwrite(bp); 175 } else 176 brelse(bp); 177 goto again; 178 } 179 } else { 180 bp->b_validoff = 0; 181 bp->b_validend = biosize - bp->b_resid; 182 } 183 vp->v_lastr = lbn; 184 if (bp->b_resid) { 185 diff = (on >= (biosize-bp->b_resid)) ? 0 : 186 (biosize-bp->b_resid-on); 187 n = min(n, diff); 188 } 189 break; 190 case VLNK: 191 nfsstats.biocache_readlinks++; 192 on = 0; 193 error = bread(vp, (daddr_t)0, NFS_MAXPATHLEN, cred, &bp); 194 n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid); 195 break; 196 case VDIR: 197 nfsstats.biocache_readdirs++; 198 on = 0; 199 error = bread(vp, uio->uio_offset, NFS_DIRBLKSIZ, cred, &bp); 200 n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid); 201 break; 202 }; 203 if (error) { 204 brelse(bp); 205 return (error); 206 } 207 208 /* 209 * For nqnfs: 210 * Must check for valid lease, since it may have expired while in 211 * bread(). If expired, get a lease. 212 * If data is stale, flush and try again. 213 * nb: If a read rpc is done by bread() or breada() and there is 214 * no valid lease, a get_lease request will be piggy backed. 215 */ 216 if (nmp->nm_flag & NFSMNT_NQNFS) { 217 if (NQNFS_CKINVALID(vp, np, NQL_READ)) { 218 do { 219 error = nqnfs_getlease(vp, NQL_READ, cred, uio->uio_procp); 220 } while (error == NQNFS_EXPIRED); 221 if (error) { 222 brelse(bp); 223 return (error); 224 } 225 if ((np->n_flag & NQNFSNONCACHE) || 226 np->n_lrev != np->n_brev || 227 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 228 if (vp->v_type == VDIR) { 229 np->n_direofoffset = 0; 230 cache_purge(vp); 231 } 232 brelse(bp); 233 np->n_flag &= ~NMODIFIED; 234 vinvalbuf(vp, TRUE, cred, uio->uio_procp); 235 np->n_brev = np->n_lrev; 236 continue; 237 } 238 } else if ((np->n_flag & NQNFSNONCACHE) || 239 ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) { 240 np->n_direofoffset = 0; 241 brelse(bp); 242 np->n_flag &= ~NMODIFIED; 243 vinvalbuf(vp, TRUE, cred, uio->uio_procp); 244 np->n_brev = np->n_lrev; 245 continue; 246 } 247 } 248 if (n > 0) 249 error = uiomove(bp->b_un.b_addr + on, (int)n, uio); 250 switch (vp->v_type) { 251 case VREG: 252 if (n+on == biosize || uio->uio_offset == np->n_size) 253 bp->b_flags |= B_AGE; 254 break; 255 case VLNK: 256 n = 0; 257 break; 258 case VDIR: 259 uio->uio_offset = bp->b_blkno; 260 break; 261 }; 262 brelse(bp); 263 } while (error == 0 && uio->uio_resid > 0 && n != 0); 264 return (error); 265 } 266 267 /* 268 * Vnode op for write using bio 269 */ 270 nfs_write(ap) 271 struct vop_write_args /* { 272 struct vnode *a_vp; 273 struct uio *a_uio; 274 int a_ioflag; 275 struct ucred *a_cred; 276 } */ *ap; 277 { 278 register int biosize; 279 register struct uio *uio = ap->a_uio; 280 struct proc *p = uio->uio_procp; 281 register struct vnode *vp = ap->a_vp; 282 struct nfsnode *np = VTONFS(vp); 283 register struct ucred *cred = ap->a_cred; 284 int ioflag = ap->a_ioflag; 285 struct buf *bp; 286 struct vattr vattr; 287 struct nfsmount *nmp; 288 daddr_t lbn, bn; 289 int n, on, error = 0; 290 291 #ifdef DIAGNOSTIC 292 if (uio->uio_rw != UIO_WRITE) 293 panic("nfs_write mode"); 294 if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) 295 panic("nfs_write proc"); 296 #endif 297 if (vp->v_type != VREG) 298 return (EIO); 299 if (np->n_flag & NWRITEERR) { 300 np->n_flag &= ~NWRITEERR; 301 return (np->n_error); 302 } 303 if (ioflag & (IO_APPEND | IO_SYNC)) { 304 if (np->n_flag & NMODIFIED) { 305 np->n_flag &= ~NMODIFIED; 306 np->n_attrstamp = 0; 307 vinvalbuf(vp, TRUE, cred, p); 308 } 309 if (ioflag & IO_APPEND) { 310 np->n_attrstamp = 0; 311 if (error = VOP_GETATTR(vp, &vattr, cred, p)) 312 return (error); 313 uio->uio_offset = np->n_size; 314 } 315 } 316 nmp = VFSTONFS(vp->v_mount); 317 if (uio->uio_offset < 0) 318 return (EINVAL); 319 if (uio->uio_resid == 0) 320 return (0); 321 /* 322 * Maybe this should be above the vnode op call, but so long as 323 * file servers have no limits, i don't think it matters 324 */ 325 if (p && uio->uio_offset + uio->uio_resid > 326 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 327 psignal(p, SIGXFSZ); 328 return (EFBIG); 329 } 330 /* 331 * I use nm_rsize, not nm_wsize so that all buffer cache blocks 332 * will be the same size within a filesystem. nfs_writerpc will 333 * still use nm_wsize when sizing the rpc's. 334 */ 335 biosize = nmp->nm_rsize; 336 np->n_flag |= NMODIFIED; 337 do { 338 339 /* 340 * Check for a valid write lease. 341 * If non-cachable, just do the rpc 342 */ 343 if ((nmp->nm_flag & NFSMNT_NQNFS) && 344 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 345 do { 346 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 347 } while (error == NQNFS_EXPIRED); 348 if (error) 349 return (error); 350 if (np->n_lrev != np->n_brev || 351 (np->n_flag & NQNFSNONCACHE)) { 352 vinvalbuf(vp, TRUE, cred, p); 353 np->n_brev = np->n_lrev; 354 } 355 } 356 if (np->n_flag & NQNFSNONCACHE) 357 return (nfs_writerpc(vp, uio, cred, 0)); 358 nfsstats.biocache_writes++; 359 lbn = uio->uio_offset / biosize; 360 on = uio->uio_offset & (biosize-1); 361 n = min((unsigned)(biosize - on), uio->uio_resid); 362 if (uio->uio_offset + n > np->n_size) { 363 np->n_size = uio->uio_offset + n; 364 vnode_pager_setsize(vp, (u_long)np->n_size); 365 } 366 bn = lbn * (biosize / DEV_BSIZE); 367 again: 368 bp = getblk(vp, bn, biosize); 369 if (bp->b_wcred == NOCRED) { 370 crhold(cred); 371 bp->b_wcred = cred; 372 } 373 374 /* 375 * If the new write will leave a contiguous dirty 376 * area, just update the b_dirtyoff and b_dirtyend, 377 * otherwise force a write rpc of the old dirty area. 378 */ 379 if (bp->b_dirtyend > 0 && 380 (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 381 bp->b_proc = p; 382 if (error = bwrite(bp)) 383 return (error); 384 goto again; 385 } 386 387 /* 388 * Check for valid write lease and get one as required. 389 * In case getblk() and/or bwrite() delayed us. 390 */ 391 if ((nmp->nm_flag & NFSMNT_NQNFS) && 392 NQNFS_CKINVALID(vp, np, NQL_WRITE)) { 393 do { 394 error = nqnfs_getlease(vp, NQL_WRITE, cred, p); 395 } while (error == NQNFS_EXPIRED); 396 if (error) { 397 brelse(bp); 398 return (error); 399 } 400 if (np->n_lrev != np->n_brev || 401 (np->n_flag & NQNFSNONCACHE)) { 402 brelse(bp); 403 vinvalbuf(vp, TRUE, cred, p); 404 np->n_brev = np->n_lrev; 405 goto again; 406 } 407 } 408 if (error = uiomove(bp->b_un.b_addr + on, n, uio)) { 409 brelse(bp); 410 return (error); 411 } 412 if (bp->b_dirtyend > 0) { 413 bp->b_dirtyoff = min(on, bp->b_dirtyoff); 414 bp->b_dirtyend = max((on+n), bp->b_dirtyend); 415 } else { 416 bp->b_dirtyoff = on; 417 bp->b_dirtyend = on+n; 418 } 419 if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || 420 bp->b_validoff > bp->b_dirtyend) { 421 bp->b_validoff = bp->b_dirtyoff; 422 bp->b_validend = bp->b_dirtyend; 423 } else { 424 bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff); 425 bp->b_validend = max(bp->b_validend, bp->b_dirtyend); 426 } 427 428 /* 429 * If the lease is non-cachable or IO_SYNC do bwrite(). 430 */ 431 if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) { 432 bp->b_proc = p; 433 bwrite(bp); 434 } else if ((n+on) == biosize && 435 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 436 bp->b_flags |= B_AGE; 437 bp->b_proc = (struct proc *)0; 438 bawrite(bp); 439 } else { 440 bp->b_proc = (struct proc *)0; 441 bdwrite(bp); 442 } 443 } while (error == 0 && uio->uio_resid > 0 && n != 0); 444 return (error); 445 } 446