1 /* $NetBSD: ulfs_readwrite.c,v 1.7 2013/10/17 21:01:08 christos Exp $ */ 2 /* from NetBSD: ufs_readwrite.c,v 1.105 2013/01/22 09:39:18 dholland Exp */ 3 4 /*- 5 * Copyright (c) 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(1, "$NetBSD: ulfs_readwrite.c,v 1.7 2013/10/17 21:01:08 christos Exp $"); 37 38 #ifdef LFS_READWRITE 39 #define FS struct lfs 40 #define I_FS i_lfs 41 #define READ lfs_read 42 #define READ_S "lfs_read" 43 #define WRITE lfs_write 44 #define WRITE_S "lfs_write" 45 #define fs_bsize lfs_bsize 46 #define fs_bmask lfs_bmask 47 #else 48 #define FS struct fs 49 #define I_FS i_fs 50 #define READ ffs_read 51 #define READ_S "ffs_read" 52 #define WRITE ffs_write 53 #define WRITE_S "ffs_write" 54 #endif 55 56 /* 57 * Vnode op for reading. 58 */ 59 /* ARGSUSED */ 60 int 61 READ(void *v) 62 { 63 struct vop_read_args /* { 64 struct vnode *a_vp; 65 struct uio *a_uio; 66 int a_ioflag; 67 kauth_cred_t a_cred; 68 } */ *ap = v; 69 struct vnode *vp; 70 struct inode *ip; 71 struct uio *uio; 72 struct buf *bp; 73 FS *fs; 74 vsize_t bytelen; 75 daddr_t lbn, nextlbn; 76 off_t bytesinfile; 77 long size, xfersize, blkoffset; 78 int error, ioflag; 79 bool usepc = false; 80 81 vp = ap->a_vp; 82 ip = VTOI(vp); 83 fs = ip->I_FS; 84 uio = ap->a_uio; 85 ioflag = ap->a_ioflag; 86 error = 0; 87 88 #ifdef DIAGNOSTIC 89 if (uio->uio_rw != UIO_READ) 90 panic("%s: mode", READ_S); 91 92 if (vp->v_type == VLNK) { 93 if (ip->i_size < fs->um_maxsymlinklen || 94 (fs->um_maxsymlinklen == 0 && DIP(ip, blocks) == 0)) 95 panic("%s: short symlink", READ_S); 96 } else if (vp->v_type != VREG && vp->v_type != VDIR) 97 panic("%s: type %d", READ_S, vp->v_type); 98 #endif 99 if ((u_int64_t)uio->uio_offset > fs->um_maxfilesize) 100 return (EFBIG); 101 if (uio->uio_resid == 0) 102 return (0); 103 104 #ifndef LFS_READWRITE 105 if ((ip->i_flags & (SF_SNAPSHOT | SF_SNAPINVAL)) == SF_SNAPSHOT) 106 return ffs_snapshot_read(vp, uio, ioflag); 107 #endif /* !LFS_READWRITE */ 108 109 fstrans_start(vp->v_mount, FSTRANS_SHARED); 110 111 if (uio->uio_offset >= ip->i_size) 112 goto out; 113 114 #ifdef LFS_READWRITE 115 usepc = (vp->v_type == VREG && ip->i_number != LFS_IFILE_INUM); 116 #else /* !LFS_READWRITE */ 117 usepc = vp->v_type == VREG; 118 #endif /* !LFS_READWRITE */ 119 if (usepc) { 120 const int advice = IO_ADV_DECODE(ap->a_ioflag); 121 122 while (uio->uio_resid > 0) { 123 if (ioflag & IO_DIRECT) { 124 genfs_directio(vp, uio, ioflag); 125 } 126 bytelen = MIN(ip->i_size - uio->uio_offset, 127 uio->uio_resid); 128 if (bytelen == 0) 129 break; 130 error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice, 131 UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp)); 132 if (error) 133 break; 134 } 135 goto out; 136 } 137 138 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 139 bytesinfile = ip->i_size - uio->uio_offset; 140 if (bytesinfile <= 0) 141 break; 142 lbn = lfs_lblkno(fs, uio->uio_offset); 143 nextlbn = lbn + 1; 144 size = lfs_blksize(fs, ip, lbn); 145 blkoffset = lfs_blkoff(fs, uio->uio_offset); 146 xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid), 147 bytesinfile); 148 149 if (lfs_lblktosize(fs, nextlbn) >= ip->i_size) 150 error = bread(vp, lbn, size, NOCRED, 0, &bp); 151 else { 152 int nextsize = lfs_blksize(fs, ip, nextlbn); 153 error = breadn(vp, lbn, 154 size, &nextlbn, &nextsize, 1, NOCRED, 0, &bp); 155 } 156 if (error) 157 break; 158 159 /* 160 * We should only get non-zero b_resid when an I/O error 161 * has occurred, which should cause us to break above. 162 * However, if the short read did not cause an error, 163 * then we want to ensure that we do not uiomove bad 164 * or uninitialized data. 165 */ 166 size -= bp->b_resid; 167 if (size < xfersize) { 168 if (size == 0) 169 break; 170 xfersize = size; 171 } 172 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); 173 if (error) 174 break; 175 brelse(bp, 0); 176 } 177 if (bp != NULL) 178 brelse(bp, 0); 179 180 out: 181 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { 182 ip->i_flag |= IN_ACCESS; 183 if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) { 184 error = lfs_update(vp, NULL, NULL, UPDATE_WAIT); 185 } 186 } 187 188 fstrans_done(vp->v_mount); 189 return (error); 190 } 191 192 /* 193 * Vnode op for writing. 194 */ 195 int 196 WRITE(void *v) 197 { 198 struct vop_write_args /* { 199 struct vnode *a_vp; 200 struct uio *a_uio; 201 int a_ioflag; 202 kauth_cred_t a_cred; 203 } */ *ap = v; 204 struct vnode *vp; 205 struct uio *uio; 206 struct inode *ip; 207 FS *fs; 208 struct buf *bp; 209 kauth_cred_t cred; 210 daddr_t lbn; 211 off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize; 212 int blkoffset, error, flags, ioflag, resid, size, xfersize; 213 int aflag; 214 int extended=0; 215 vsize_t bytelen; 216 bool async; 217 bool usepc = false; 218 #ifdef LFS_READWRITE 219 bool need_unreserve = false; 220 #endif 221 222 cred = ap->a_cred; 223 ioflag = ap->a_ioflag; 224 uio = ap->a_uio; 225 vp = ap->a_vp; 226 ip = VTOI(vp); 227 228 KASSERT(vp->v_size == ip->i_size); 229 #ifdef DIAGNOSTIC 230 if (uio->uio_rw != UIO_WRITE) 231 panic("%s: mode", WRITE_S); 232 #endif 233 234 switch (vp->v_type) { 235 case VREG: 236 if (ioflag & IO_APPEND) 237 uio->uio_offset = ip->i_size; 238 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 239 return (EPERM); 240 /* FALLTHROUGH */ 241 case VLNK: 242 break; 243 case VDIR: 244 if ((ioflag & IO_SYNC) == 0) 245 panic("%s: nonsync dir write", WRITE_S); 246 break; 247 default: 248 panic("%s: type", WRITE_S); 249 } 250 251 fs = ip->I_FS; 252 if (uio->uio_offset < 0 || 253 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->um_maxfilesize) 254 return (EFBIG); 255 #ifdef LFS_READWRITE 256 /* Disallow writes to the Ifile, even if noschg flag is removed */ 257 /* XXX can this go away when the Ifile is no longer in the namespace? */ 258 if (vp == fs->lfs_ivnode) 259 return (EPERM); 260 #endif 261 if (uio->uio_resid == 0) 262 return (0); 263 264 fstrans_start(vp->v_mount, FSTRANS_SHARED); 265 266 flags = ioflag & IO_SYNC ? B_SYNC : 0; 267 async = vp->v_mount->mnt_flag & MNT_ASYNC; 268 origoff = uio->uio_offset; 269 resid = uio->uio_resid; 270 osize = ip->i_size; 271 error = 0; 272 273 usepc = vp->v_type == VREG; 274 275 #ifdef LFS_READWRITE 276 async = true; 277 lfs_availwait(fs, lfs_btofsb(fs, uio->uio_resid)); 278 lfs_check(vp, LFS_UNUSED_LBN, 0); 279 #endif /* !LFS_READWRITE */ 280 if (!usepc) 281 goto bcache; 282 283 preallocoff = round_page(lfs_blkroundup(fs, MAX(osize, uio->uio_offset))); 284 aflag = ioflag & IO_SYNC ? B_SYNC : 0; 285 nsize = MAX(osize, uio->uio_offset + uio->uio_resid); 286 endallocoff = nsize - lfs_blkoff(fs, nsize); 287 288 /* 289 * if we're increasing the file size, deal with expanding 290 * the fragment if there is one. 291 */ 292 293 if (nsize > osize && lfs_lblkno(fs, osize) < ULFS_NDADDR && 294 lfs_lblkno(fs, osize) != lfs_lblkno(fs, nsize) && 295 lfs_blkroundup(fs, osize) != osize) { 296 off_t eob; 297 298 eob = lfs_blkroundup(fs, osize); 299 uvm_vnp_setwritesize(vp, eob); 300 error = ulfs_balloc_range(vp, osize, eob - osize, cred, aflag); 301 if (error) 302 goto out; 303 if (flags & B_SYNC) { 304 mutex_enter(vp->v_interlock); 305 VOP_PUTPAGES(vp, trunc_page(osize & fs->fs_bmask), 306 round_page(eob), 307 PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); 308 } 309 } 310 311 while (uio->uio_resid > 0) { 312 int ubc_flags = UBC_WRITE; 313 bool overwrite; /* if we're overwrite a whole block */ 314 off_t newoff; 315 316 if (ioflag & IO_DIRECT) { 317 genfs_directio(vp, uio, ioflag | IO_JOURNALLOCKED); 318 } 319 320 oldoff = uio->uio_offset; 321 blkoffset = lfs_blkoff(fs, uio->uio_offset); 322 bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); 323 if (bytelen == 0) { 324 break; 325 } 326 327 /* 328 * if we're filling in a hole, allocate the blocks now and 329 * initialize the pages first. if we're extending the file, 330 * we can safely allocate blocks without initializing pages 331 * since the new blocks will be inaccessible until the write 332 * is complete. 333 */ 334 overwrite = uio->uio_offset >= preallocoff && 335 uio->uio_offset < endallocoff; 336 if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 && 337 lfs_blkoff(fs, uio->uio_offset) == 0 && 338 (uio->uio_offset & PAGE_MASK) == 0) { 339 vsize_t len; 340 341 len = trunc_page(bytelen); 342 len -= lfs_blkoff(fs, len); 343 if (len > 0) { 344 overwrite = true; 345 bytelen = len; 346 } 347 } 348 349 newoff = oldoff + bytelen; 350 if (vp->v_size < newoff) { 351 uvm_vnp_setwritesize(vp, newoff); 352 } 353 354 if (!overwrite) { 355 error = ulfs_balloc_range(vp, uio->uio_offset, bytelen, 356 cred, aflag); 357 if (error) 358 break; 359 } else { 360 genfs_node_wrlock(vp); 361 error = GOP_ALLOC(vp, uio->uio_offset, bytelen, 362 aflag, cred); 363 genfs_node_unlock(vp); 364 if (error) 365 break; 366 ubc_flags |= UBC_FAULTBUSY; 367 } 368 369 /* 370 * copy the data. 371 */ 372 373 error = ubc_uiomove(&vp->v_uobj, uio, bytelen, 374 IO_ADV_DECODE(ioflag), ubc_flags | UBC_UNMAP_FLAG(vp)); 375 376 /* 377 * update UVM's notion of the size now that we've 378 * copied the data into the vnode's pages. 379 * 380 * we should update the size even when uiomove failed. 381 */ 382 383 if (vp->v_size < newoff) { 384 uvm_vnp_setsize(vp, newoff); 385 extended = 1; 386 } 387 388 if (error) 389 break; 390 391 /* 392 * flush what we just wrote if necessary. 393 * XXXUBC simplistic async flushing. 394 */ 395 396 #ifndef LFS_READWRITE 397 if (!async && oldoff >> 16 != uio->uio_offset >> 16) { 398 mutex_enter(vp->v_interlock); 399 error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16, 400 (uio->uio_offset >> 16) << 16, 401 PGO_CLEANIT | PGO_JOURNALLOCKED | PGO_LAZY); 402 if (error) 403 break; 404 } 405 #else 406 __USE(async); 407 #endif 408 } 409 if (error == 0 && ioflag & IO_SYNC) { 410 mutex_enter(vp->v_interlock); 411 error = VOP_PUTPAGES(vp, trunc_page(origoff & fs->fs_bmask), 412 round_page(lfs_blkroundup(fs, uio->uio_offset)), 413 PGO_CLEANIT | PGO_SYNCIO | PGO_JOURNALLOCKED); 414 } 415 goto out; 416 417 bcache: 418 mutex_enter(vp->v_interlock); 419 VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid), 420 PGO_CLEANIT | PGO_FREE | PGO_SYNCIO | PGO_JOURNALLOCKED); 421 while (uio->uio_resid > 0) { 422 lbn = lfs_lblkno(fs, uio->uio_offset); 423 blkoffset = lfs_blkoff(fs, uio->uio_offset); 424 xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid); 425 if (fs->fs_bsize > xfersize) 426 flags |= B_CLRBUF; 427 else 428 flags &= ~B_CLRBUF; 429 430 #ifdef LFS_READWRITE 431 error = lfs_reserve(fs, vp, NULL, 432 lfs_btofsb(fs, (ULFS_NIADDR + 1) << fs->lfs_bshift)); 433 if (error) 434 break; 435 need_unreserve = true; 436 #endif 437 error = lfs_balloc(vp, uio->uio_offset, xfersize, 438 ap->a_cred, flags, &bp); 439 440 if (error) 441 break; 442 if (uio->uio_offset + xfersize > ip->i_size) { 443 ip->i_size = uio->uio_offset + xfersize; 444 DIP_ASSIGN(ip, size, ip->i_size); 445 uvm_vnp_setsize(vp, ip->i_size); 446 extended = 1; 447 } 448 size = lfs_blksize(fs, ip, lbn) - bp->b_resid; 449 if (xfersize > size) 450 xfersize = size; 451 452 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); 453 454 /* 455 * if we didn't clear the block and the uiomove failed, 456 * the buf will now contain part of some other file, 457 * so we need to invalidate it. 458 */ 459 if (error && (flags & B_CLRBUF) == 0) { 460 brelse(bp, BC_INVAL); 461 break; 462 } 463 #ifdef LFS_READWRITE 464 (void)VOP_BWRITE(bp->b_vp, bp); 465 lfs_reserve(fs, vp, NULL, 466 -lfs_btofsb(fs, (ULFS_NIADDR + 1) << fs->lfs_bshift)); 467 need_unreserve = false; 468 #else 469 if (ioflag & IO_SYNC) 470 (void)bwrite(bp); 471 else if (xfersize + blkoffset == fs->fs_bsize) 472 bawrite(bp); 473 else 474 bdwrite(bp); 475 #endif 476 if (error || xfersize == 0) 477 break; 478 } 479 #ifdef LFS_READWRITE 480 if (need_unreserve) { 481 lfs_reserve(fs, vp, NULL, 482 -lfs_btofsb(fs, (ULFS_NIADDR + 1) << fs->lfs_bshift)); 483 } 484 #endif 485 486 /* 487 * If we successfully wrote any data, and we are not the superuser 488 * we clear the setuid and setgid bits as a precaution against 489 * tampering. 490 */ 491 out: 492 ip->i_flag |= IN_CHANGE | IN_UPDATE; 493 if (vp->v_mount->mnt_flag & MNT_RELATIME) 494 ip->i_flag |= IN_ACCESS; 495 if (resid > uio->uio_resid && ap->a_cred) { 496 if (ip->i_mode & ISUID) { 497 if (kauth_authorize_vnode(ap->a_cred, 498 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) { 499 ip->i_mode &= ~ISUID; 500 DIP_ASSIGN(ip, mode, ip->i_mode); 501 } 502 } 503 504 if (ip->i_mode & ISGID) { 505 if (kauth_authorize_vnode(ap->a_cred, 506 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) { 507 ip->i_mode &= ~ISGID; 508 DIP_ASSIGN(ip, mode, ip->i_mode); 509 } 510 } 511 } 512 if (resid > uio->uio_resid) 513 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 514 if (error) { 515 (void) lfs_truncate(vp, osize, ioflag & IO_SYNC, ap->a_cred); 516 uio->uio_offset -= resid - uio->uio_resid; 517 uio->uio_resid = resid; 518 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) { 519 error = lfs_update(vp, NULL, NULL, UPDATE_WAIT); 520 } else { 521 /* nothing */ 522 } 523 KASSERT(vp->v_size == ip->i_size); 524 fstrans_done(vp->v_mount); 525 526 return (error); 527 } 528