1 /* $NetBSD: ufs_readwrite.c,v 1.34 2001/09/16 13:57:56 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 36 */ 37 38 #ifdef LFS_READWRITE 39 #define BLKSIZE(a, b, c) blksize(a, b, c) 40 #define FS struct lfs 41 #define I_FS i_lfs 42 #define READ lfs_read 43 #define READ_S "lfs_read" 44 #define WRITE lfs_write 45 #define WRITE_S "lfs_write" 46 #define fs_bsize lfs_bsize 47 #define fs_maxfilesize lfs_maxfilesize 48 #else 49 #define BLKSIZE(a, b, c) blksize(a, b, c) 50 #define FS struct fs 51 #define I_FS i_fs 52 #define READ ffs_read 53 #define READ_S "ffs_read" 54 #define WRITE ffs_write 55 #define WRITE_S "ffs_write" 56 #endif 57 58 /* 59 * Vnode op for reading. 60 */ 61 /* ARGSUSED */ 62 int 63 READ(void *v) 64 { 65 struct vop_read_args /* { 66 struct vnode *a_vp; 67 struct uio *a_uio; 68 int a_ioflag; 69 struct ucred *a_cred; 70 } */ *ap = v; 71 struct vnode *vp; 72 struct inode *ip; 73 struct uio *uio; 74 FS *fs; 75 void *win; 76 vsize_t bytelen; 77 struct buf *bp; 78 ufs_daddr_t lbn, nextlbn; 79 off_t bytesinfile; 80 long size, xfersize, blkoffset; 81 int error; 82 boolean_t usepc = FALSE; 83 84 vp = ap->a_vp; 85 ip = VTOI(vp); 86 uio = ap->a_uio; 87 error = 0; 88 89 #ifdef DIAGNOSTIC 90 if (uio->uio_rw != UIO_READ) 91 panic("%s: mode", READ_S); 92 93 if (vp->v_type == VLNK) { 94 if ((int)ip->i_ffs_size < vp->v_mount->mnt_maxsymlinklen || 95 (vp->v_mount->mnt_maxsymlinklen == 0 && 96 ip->i_ffs_blocks == 0)) 97 panic("%s: short symlink", READ_S); 98 } else if (vp->v_type != VREG && vp->v_type != VDIR) 99 panic("%s: type %d", READ_S, vp->v_type); 100 #endif 101 fs = ip->I_FS; 102 if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize) 103 return (EFBIG); 104 if (uio->uio_resid == 0) 105 return (0); 106 if (uio->uio_offset >= ip->i_ffs_size) { 107 goto out; 108 } 109 110 #ifndef LFS_READWRITE 111 usepc = vp->v_type == VREG; 112 #endif 113 if (usepc) { 114 while (uio->uio_resid > 0) { 115 bytelen = MIN(ip->i_ffs_size - uio->uio_offset, 116 uio->uio_resid); 117 if (bytelen == 0) 118 break; 119 120 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, 121 &bytelen, UBC_READ); 122 error = uiomove(win, bytelen, uio); 123 ubc_release(win, 0); 124 if (error) 125 break; 126 } 127 goto out; 128 } 129 130 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 131 bytesinfile = ip->i_ffs_size - uio->uio_offset; 132 if (bytesinfile <= 0) 133 break; 134 lbn = lblkno(fs, uio->uio_offset); 135 nextlbn = lbn + 1; 136 size = BLKSIZE(fs, ip, lbn); 137 blkoffset = blkoff(fs, uio->uio_offset); 138 xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid), 139 bytesinfile); 140 141 if (lblktosize(fs, nextlbn) >= ip->i_ffs_size) 142 error = bread(vp, lbn, size, NOCRED, &bp); 143 else { 144 int nextsize = BLKSIZE(fs, ip, nextlbn); 145 error = breadn(vp, lbn, 146 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 147 } 148 if (error) 149 break; 150 151 /* 152 * We should only get non-zero b_resid when an I/O error 153 * has occurred, which should cause us to break above. 154 * However, if the short read did not cause an error, 155 * then we want to ensure that we do not uiomove bad 156 * or uninitialized data. 157 */ 158 size -= bp->b_resid; 159 if (size < xfersize) { 160 if (size == 0) 161 break; 162 xfersize = size; 163 } 164 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); 165 if (error) 166 break; 167 brelse(bp); 168 } 169 if (bp != NULL) 170 brelse(bp); 171 172 out: 173 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { 174 ip->i_flag |= IN_ACCESS; 175 if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) 176 error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT); 177 } 178 return (error); 179 } 180 181 /* 182 * Vnode op for writing. 183 */ 184 int 185 WRITE(void *v) 186 { 187 struct vop_write_args /* { 188 struct vnode *a_vp; 189 struct uio *a_uio; 190 int a_ioflag; 191 struct ucred *a_cred; 192 } */ *ap = v; 193 struct vnode *vp; 194 struct uio *uio; 195 struct inode *ip; 196 struct genfs_node *gp; 197 FS *fs; 198 struct buf *bp; 199 struct proc *p; 200 struct ucred *cred; 201 ufs_daddr_t lbn; 202 off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize; 203 int blkoffset, error, flags, ioflag, resid, size, xfersize; 204 int bsize, aflag; 205 int ubc_alloc_flags; 206 void *win; 207 vsize_t bytelen; 208 boolean_t alloced; 209 boolean_t usepc = FALSE; 210 211 cred = ap->a_cred; 212 ioflag = ap->a_ioflag; 213 uio = ap->a_uio; 214 vp = ap->a_vp; 215 ip = VTOI(vp); 216 gp = VTOG(vp); 217 218 KASSERT(vp->v_size == ip->i_ffs_size); 219 #ifdef DIAGNOSTIC 220 if (uio->uio_rw != UIO_WRITE) 221 panic("%s: mode", WRITE_S); 222 #endif 223 224 switch (vp->v_type) { 225 case VREG: 226 if (ioflag & IO_APPEND) 227 uio->uio_offset = ip->i_ffs_size; 228 if ((ip->i_ffs_flags & APPEND) && uio->uio_offset != ip->i_ffs_size) 229 return (EPERM); 230 /* FALLTHROUGH */ 231 case VLNK: 232 break; 233 case VDIR: 234 if ((ioflag & IO_SYNC) == 0) 235 panic("%s: nonsync dir write", WRITE_S); 236 break; 237 default: 238 panic("%s: type", WRITE_S); 239 } 240 241 fs = ip->I_FS; 242 if (uio->uio_offset < 0 || 243 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 244 return (EFBIG); 245 #ifdef LFS_READWRITE 246 /* Disallow writes to the Ifile, even if noschg flag is removed */ 247 /* XXX can this go away when the Ifile is no longer in the namespace? */ 248 if (vp == fs->lfs_ivnode) 249 return (EPERM); 250 #endif 251 252 /* 253 * Maybe this should be above the vnode op call, but so long as 254 * file servers have no limits, I don't think it matters. 255 */ 256 p = uio->uio_procp; 257 if (vp->v_type == VREG && p && 258 uio->uio_offset + uio->uio_resid > 259 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 260 psignal(p, SIGXFSZ); 261 return (EFBIG); 262 } 263 264 resid = uio->uio_resid; 265 osize = ip->i_ffs_size; 266 bsize = fs->fs_bsize; 267 error = 0; 268 269 #ifndef LFS_READWRITE 270 usepc = vp->v_type == VREG; 271 #endif 272 if (!usepc) { 273 goto bcache; 274 } 275 276 preallocoff = round_page(blkroundup(fs, MAX(osize, uio->uio_offset))); 277 aflag = ioflag & IO_SYNC ? B_SYNC : 0; 278 nsize = MAX(osize, uio->uio_offset + uio->uio_resid); 279 endallocoff = nsize - blkoff(fs, nsize); 280 281 /* 282 * if we're increasing the file size, deal with expanding 283 * the fragment if there is one. 284 */ 285 286 if (nsize > osize && lblkno(fs, osize) < NDADDR && 287 lblkno(fs, osize) != lblkno(fs, nsize) && 288 blkroundup(fs, osize) != osize) { 289 error = ufs_balloc_range(vp, osize, blkroundup(fs, osize) - 290 osize, cred, aflag); 291 if (error) { 292 goto out; 293 } 294 } 295 296 alloced = FALSE; 297 ubc_alloc_flags = UBC_WRITE; 298 origoff = uio->uio_offset; 299 while (uio->uio_resid > 0) { 300 oldoff = uio->uio_offset; 301 blkoffset = blkoff(fs, uio->uio_offset); 302 bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); 303 304 /* 305 * if we're filling in a hole, allocate the blocks now and 306 * initialize the pages first. if we're extending the file, 307 * we can safely allocate blocks without initializing pages 308 * since the new blocks will be inaccessible until the write 309 * is complete. 310 */ 311 312 if (uio->uio_offset < preallocoff || 313 uio->uio_offset >= endallocoff) { 314 error = ufs_balloc_range(vp, uio->uio_offset, bytelen, 315 cred, aflag); 316 if (error) { 317 break; 318 } 319 ubc_alloc_flags &= ~UBC_FAULTBUSY; 320 } else if (!alloced) { 321 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL); 322 error = GOP_ALLOC(vp, uio->uio_offset, uio->uio_resid, 323 aflag, cred); 324 lockmgr(&gp->g_glock, LK_RELEASE, NULL); 325 if (error) { 326 (void) VOP_TRUNCATE(vp, preallocoff, 327 ioflag & IO_SYNC, ap->a_cred, 328 uio->uio_procp); 329 break; 330 } 331 alloced = TRUE; 332 ubc_alloc_flags |= UBC_FAULTBUSY; 333 } 334 335 /* 336 * copy the data. 337 */ 338 339 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen, 340 ubc_alloc_flags); 341 error = uiomove(win, bytelen, uio); 342 ubc_release(win, 0); 343 if (error) { 344 break; 345 } 346 347 /* 348 * update UVM's notion of the size now that we've 349 * copied the data into the vnode's pages. 350 */ 351 352 if (vp->v_size < uio->uio_offset) { 353 uvm_vnp_setsize(vp, uio->uio_offset); 354 } 355 356 /* 357 * flush what we just wrote if necessary. 358 * XXXUBC simplistic async flushing. 359 */ 360 361 if (oldoff >> 16 != uio->uio_offset >> 16) { 362 simple_lock(&vp->v_uobj.vmobjlock); 363 error = (vp->v_uobj.pgops->pgo_put)(&vp->v_uobj, 364 (oldoff >> 16) << 16, (uio->uio_offset >> 16) << 16, 365 PGO_CLEANIT); 366 if (error) { 367 break; 368 } 369 } 370 } 371 if (error == 0 && ioflag & IO_SYNC) { 372 simple_lock(&vp->v_uobj.vmobjlock); 373 error = (vp->v_uobj.pgops->pgo_put)(&vp->v_uobj, 374 origoff & ~(bsize - 1), blkroundup(fs, uio->uio_offset), 375 PGO_CLEANIT|PGO_SYNCIO); 376 } 377 goto out; 378 379 bcache: 380 flags = ioflag & IO_SYNC ? B_SYNC : 0; 381 while (uio->uio_resid > 0) { 382 lbn = lblkno(fs, uio->uio_offset); 383 blkoffset = blkoff(fs, uio->uio_offset); 384 xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); 385 if (fs->fs_bsize > xfersize) 386 flags |= B_CLRBUF; 387 else 388 flags &= ~B_CLRBUF; 389 390 error = VOP_BALLOC(vp, uio->uio_offset, xfersize, 391 ap->a_cred, flags, &bp); 392 393 if (error) 394 break; 395 if (uio->uio_offset + xfersize > ip->i_ffs_size) { 396 ip->i_ffs_size = uio->uio_offset + xfersize; 397 uvm_vnp_setsize(vp, ip->i_ffs_size); 398 } 399 size = BLKSIZE(fs, ip, lbn) - bp->b_resid; 400 if (xfersize > size) 401 xfersize = size; 402 403 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); 404 405 /* 406 * if we didn't clear the block and the uiomove failed, 407 * the buf will now contain part of some other file, 408 * so we need to invalidate it. 409 */ 410 if (error && (flags & B_CLRBUF) == 0) { 411 bp->b_flags |= B_INVAL; 412 brelse(bp); 413 break; 414 } 415 #ifdef LFS_READWRITE 416 if (!error) 417 error = lfs_reserve(fs, vp, btofsb(fs, (NIADDR + 1) << fs->lfs_bshift)); 418 (void)VOP_BWRITE(bp); 419 if (!error) 420 lfs_reserve(fs, vp, -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift)); 421 #else 422 if (ioflag & IO_SYNC) 423 (void)bwrite(bp); 424 else if (xfersize + blkoffset == fs->fs_bsize) 425 bawrite(bp); 426 else 427 bdwrite(bp); 428 #endif 429 if (error || xfersize == 0) 430 break; 431 } 432 /* 433 * If we successfully wrote any data, and we are not the superuser 434 * we clear the setuid and setgid bits as a precaution against 435 * tampering. 436 */ 437 out: 438 ip->i_flag |= IN_CHANGE | IN_UPDATE; 439 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) 440 ip->i_ffs_mode &= ~(ISUID | ISGID); 441 if (error) { 442 (void) VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred, 443 uio->uio_procp); 444 uio->uio_offset -= resid - uio->uio_resid; 445 uio->uio_resid = resid; 446 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) 447 error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT); 448 KASSERT(vp->v_size == ip->i_ffs_size); 449 return (error); 450 } 451