1 /* $NetBSD: ulfs_readwrite.c,v 1.27 2020/04/23 21:47:09 ad Exp $ */ 2 /* from NetBSD: ufs_readwrite.c,v 1.120 2015/04/12 22:48:38 riastradh Exp */ 3 4 /*- 5 * Copyright (c) 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 33 */ 34 35 #include <sys/cdefs.h> 36 __KERNEL_RCSID(1, "$NetBSD: ulfs_readwrite.c,v 1.27 2020/04/23 21:47:09 ad Exp $"); 37 38 #define FS struct lfs 39 #define I_FS i_lfs 40 #define READ lfs_read 41 #define READ_S "lfs_read" 42 #define WRITE lfs_write 43 #define WRITE_S "lfs_write" 44 #define BUFRD lfs_bufrd 45 #define BUFWR lfs_bufwr 46 #define fs_sb_getbsize(fs) lfs_sb_getbsize(fs) 47 #define fs_bmask lfs_bmask 48 49 static int ulfs_post_read_update(struct vnode *, int, int); 50 static int ulfs_post_write_update(struct vnode *, struct uio *, int, 51 kauth_cred_t, off_t, int, int, int); 52 53 /* 54 * Vnode op for reading. 55 */ 56 /* ARGSUSED */ 57 int 58 READ(void *v) 59 { 60 struct vop_read_args /* { 61 struct vnode *a_vp; 62 struct uio *a_uio; 63 int a_ioflag; 64 kauth_cred_t a_cred; 65 } */ *ap = v; 66 struct vnode *vp; 67 struct inode *ip; 68 struct uio *uio; 69 FS *fs; 70 vsize_t bytelen; 71 int error, ioflag, advice; 72 73 vp = ap->a_vp; 74 ip = VTOI(vp); 75 fs = ip->I_FS; 76 uio = ap->a_uio; 77 ioflag = ap->a_ioflag; 78 error = 0; 79 80 KASSERT(uio->uio_rw == UIO_READ); 81 KASSERT(vp->v_type == VREG || vp->v_type == VDIR); 82 83 /* XXX Eliminate me by refusing directory reads from userland. */ 84 if (vp->v_type == VDIR) 85 return BUFRD(vp, uio, ioflag, ap->a_cred); 86 /* XXX Eliminate me by using ufs_bufio in lfs. */ 87 if (vp->v_type == VREG && ip->i_number == LFS_IFILE_INUM) 88 return BUFRD(vp, uio, ioflag, ap->a_cred); 89 if ((u_int64_t)uio->uio_offset > fs->um_maxfilesize) 90 return (EFBIG); 91 if (uio->uio_resid == 0) 92 return (0); 93 94 95 if (uio->uio_offset >= ip->i_size) 96 goto out; 97 98 KASSERT(vp->v_type == VREG); 99 advice = IO_ADV_DECODE(ap->a_ioflag); 100 while (uio->uio_resid > 0) { 101 if (ioflag & IO_DIRECT) { 102 genfs_directio(vp, uio, ioflag); 103 } 104 bytelen = MIN(ip->i_size - uio->uio_offset, uio->uio_resid); 105 if (bytelen == 0) 106 break; 107 error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice, 108 UBC_READ | UBC_PARTIALOK | UBC_VNODE_FLAGS(vp)); 109 if (error) 110 break; 111 } 112 113 out: 114 error = ulfs_post_read_update(vp, ap->a_ioflag, error); 115 return (error); 116 } 117 118 /* 119 * UFS op for reading via the buffer cache 120 */ 121 int 122 BUFRD(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred) 123 { 124 struct inode *ip; 125 FS *fs; 126 struct buf *bp; 127 daddr_t lbn, nextlbn; 128 off_t bytesinfile; 129 long size, xfersize, blkoffset; 130 int error; 131 132 KASSERT(VOP_ISLOCKED(vp)); 133 KASSERT(vp->v_type == VDIR || vp->v_type == VLNK || 134 vp->v_type == VREG); 135 KASSERT(uio->uio_rw == UIO_READ); 136 137 ip = VTOI(vp); 138 fs = ip->I_FS; 139 error = 0; 140 141 KASSERT(vp->v_type != VLNK || ip->i_size >= fs->um_maxsymlinklen); 142 KASSERT(vp->v_type != VLNK || fs->um_maxsymlinklen != 0 || 143 DIP(ip, blocks) == 0); 144 KASSERT(vp->v_type != VREG || vp == fs->lfs_ivnode); 145 KASSERT(vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM); 146 147 if (uio->uio_offset > fs->um_maxfilesize) 148 return EFBIG; 149 if (uio->uio_resid == 0) 150 return 0; 151 152 153 if (uio->uio_offset >= ip->i_size) 154 goto out; 155 156 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 157 bytesinfile = ip->i_size - uio->uio_offset; 158 if (bytesinfile <= 0) 159 break; 160 lbn = lfs_lblkno(fs, uio->uio_offset); 161 nextlbn = lbn + 1; 162 size = lfs_blksize(fs, ip, lbn); 163 blkoffset = lfs_blkoff(fs, uio->uio_offset); 164 xfersize = MIN(MIN(fs_sb_getbsize(fs) - blkoffset, uio->uio_resid), 165 bytesinfile); 166 167 if (lfs_lblktosize(fs, nextlbn) >= ip->i_size) 168 error = bread(vp, lbn, size, 0, &bp); 169 else { 170 int nextsize = lfs_blksize(fs, ip, nextlbn); 171 error = breadn(vp, lbn, 172 size, &nextlbn, &nextsize, 1, 0, &bp); 173 } 174 if (error) 175 break; 176 177 /* 178 * We should only get non-zero b_resid when an I/O error 179 * has occurred, which should cause us to break above. 180 * However, if the short read did not cause an error, 181 * then we want to ensure that we do not uiomove bad 182 * or uninitialized data. 183 */ 184 size -= bp->b_resid; 185 if (size < xfersize) { 186 if (size == 0) 187 break; 188 xfersize = size; 189 } 190 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); 191 if (error) 192 break; 193 brelse(bp, 0); 194 } 195 if (bp != NULL) 196 brelse(bp, 0); 197 198 out: 199 error = ulfs_post_read_update(vp, ioflag, error); 200 return (error); 201 } 202 203 static int 204 ulfs_post_read_update(struct vnode *vp, int ioflag, int oerror) 205 { 206 struct inode *ip = VTOI(vp); 207 int error = oerror; 208 209 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { 210 ip->i_state |= IN_ACCESS; 211 if ((ioflag & IO_SYNC) == IO_SYNC) { 212 error = lfs_update(vp, NULL, NULL, UPDATE_WAIT); 213 } 214 } 215 216 /* Read error overrides any inode update error. */ 217 if (oerror) 218 error = oerror; 219 return error; 220 } 221 222 /* 223 * Vnode op for writing. 224 */ 225 int 226 WRITE(void *v) 227 { 228 struct vop_write_args /* { 229 struct vnode *a_vp; 230 struct uio *a_uio; 231 int a_ioflag; 232 kauth_cred_t a_cred; 233 } */ *ap = v; 234 struct vnode *vp; 235 struct uio *uio; 236 struct inode *ip; 237 FS *fs; 238 kauth_cred_t cred; 239 off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize; 240 int blkoffset, error, flags, ioflag, resid; 241 int aflag; 242 int extended=0; 243 vsize_t bytelen; 244 bool async; 245 246 cred = ap->a_cred; 247 ioflag = ap->a_ioflag; 248 uio = ap->a_uio; 249 vp = ap->a_vp; 250 ip = VTOI(vp); 251 252 KASSERT(vp->v_size == ip->i_size); 253 KASSERT(uio->uio_rw == UIO_WRITE); 254 KASSERT(vp->v_type == VREG); 255 256 if (ioflag & IO_APPEND) 257 uio->uio_offset = ip->i_size; 258 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 259 return (EPERM); 260 261 fs = ip->I_FS; 262 if (uio->uio_offset < 0 || 263 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->um_maxfilesize) 264 return (EFBIG); 265 /* Disallow writes to the Ifile, even if noschg flag is removed */ 266 /* XXX can this go away when the Ifile is no longer in the namespace? */ 267 if (vp == fs->lfs_ivnode) 268 return (EPERM); 269 if (uio->uio_resid == 0) 270 return (0); 271 272 flags = ioflag & IO_SYNC ? B_SYNC : 0; 273 async = vp->v_mount->mnt_flag & MNT_ASYNC; 274 origoff = uio->uio_offset; 275 resid = uio->uio_resid; 276 osize = ip->i_size; 277 error = 0; 278 279 KASSERT(vp->v_type == VREG); 280 281 async = true; 282 lfs_availwait(fs, lfs_btofsb(fs, uio->uio_resid)); 283 lfs_check(vp, LFS_UNUSED_LBN, 0); 284 285 preallocoff = round_page(lfs_blkroundup(fs, MAX(osize, uio->uio_offset))); 286 aflag = ioflag & IO_SYNC ? B_SYNC : 0; 287 nsize = MAX(osize, uio->uio_offset + uio->uio_resid); 288 endallocoff = nsize - lfs_blkoff(fs, nsize); 289 290 /* 291 * if we're increasing the file size, deal with expanding 292 * the fragment if there is one. 293 */ 294 295 if (nsize > osize && lfs_lblkno(fs, osize) < ULFS_NDADDR && 296 lfs_lblkno(fs, osize) != lfs_lblkno(fs, nsize) && 297 lfs_blkroundup(fs, osize) != osize) { 298 off_t eob; 299 300 eob = lfs_blkroundup(fs, osize); 301 uvm_vnp_setwritesize(vp, eob); 302 error = ulfs_balloc_range(vp, osize, eob - osize, cred, aflag); 303 if (error) 304 goto out; 305 if (flags & B_SYNC) { 306 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 307 VOP_PUTPAGES(vp, trunc_page(osize & lfs_sb_getbmask(fs)), 308 round_page(eob), 309 PGO_CLEANIT | PGO_SYNCIO); 310 } 311 } 312 313 while (uio->uio_resid > 0) { 314 int ubc_flags = UBC_WRITE; 315 bool overwrite; /* if we're overwrite a whole block */ 316 off_t newoff; 317 318 if (ioflag & IO_DIRECT) { 319 genfs_directio(vp, uio, ioflag); 320 } 321 322 oldoff = uio->uio_offset; 323 blkoffset = lfs_blkoff(fs, uio->uio_offset); 324 bytelen = MIN(fs_sb_getbsize(fs) - blkoffset, uio->uio_resid); 325 if (bytelen == 0) { 326 break; 327 } 328 329 /* 330 * if we're filling in a hole, allocate the blocks now and 331 * initialize the pages first. if we're extending the file, 332 * we can safely allocate blocks without initializing pages 333 * since the new blocks will be inaccessible until the write 334 * is complete. 335 */ 336 overwrite = uio->uio_offset >= preallocoff && 337 uio->uio_offset < endallocoff; 338 if (!overwrite && (vp->v_vflag & VV_MAPPED) == 0 && 339 lfs_blkoff(fs, uio->uio_offset) == 0 && 340 (uio->uio_offset & PAGE_MASK) == 0) { 341 vsize_t len; 342 343 len = trunc_page(bytelen); 344 len -= lfs_blkoff(fs, len); 345 if (len > 0) { 346 overwrite = true; 347 bytelen = len; 348 } 349 } 350 351 newoff = oldoff + bytelen; 352 if (vp->v_size < newoff) { 353 uvm_vnp_setwritesize(vp, newoff); 354 } 355 356 if (!overwrite) { 357 error = ulfs_balloc_range(vp, uio->uio_offset, bytelen, 358 cred, aflag); 359 if (error) 360 break; 361 } else { 362 genfs_node_wrlock(vp); 363 error = GOP_ALLOC(vp, uio->uio_offset, bytelen, 364 aflag, cred); 365 genfs_node_unlock(vp); 366 if (error) 367 break; 368 ubc_flags |= UBC_FAULTBUSY; 369 } 370 371 /* 372 * copy the data. 373 */ 374 375 error = ubc_uiomove(&vp->v_uobj, uio, bytelen, 376 IO_ADV_DECODE(ioflag), ubc_flags | UBC_VNODE_FLAGS(vp)); 377 378 /* 379 * update UVM's notion of the size now that we've 380 * copied the data into the vnode's pages. 381 * 382 * we should update the size even when uiomove failed. 383 */ 384 385 if (vp->v_size < newoff) { 386 uvm_vnp_setsize(vp, newoff); 387 extended = 1; 388 } 389 390 if (error) 391 break; 392 393 /* 394 * flush what we just wrote if necessary. 395 * XXXUBC simplistic async flushing. 396 */ 397 398 __USE(async); 399 } 400 if (error == 0 && ioflag & IO_SYNC) { 401 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 402 error = VOP_PUTPAGES(vp, trunc_page(origoff & lfs_sb_getbmask(fs)), 403 round_page(lfs_blkroundup(fs, uio->uio_offset)), 404 PGO_CLEANIT | PGO_SYNCIO); 405 } 406 407 out: 408 error = ulfs_post_write_update(vp, uio, ioflag, cred, osize, resid, 409 extended, error); 410 411 return (error); 412 } 413 414 /* 415 * UFS op for writing via the buffer cache 416 */ 417 int 418 BUFWR(struct vnode *vp, struct uio *uio, int ioflag, kauth_cred_t cred) 419 { 420 struct inode *ip; 421 FS *fs; 422 int flags; 423 struct buf *bp; 424 off_t osize; 425 int resid, xfersize, size, blkoffset; 426 daddr_t lbn; 427 int extended=0; 428 int error; 429 bool need_unreserve = false; 430 431 KASSERT(ISSET(ioflag, IO_NODELOCKED)); 432 KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); 433 KASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 434 KASSERT(vp->v_type != VDIR || ISSET(ioflag, IO_SYNC)); 435 KASSERT(uio->uio_rw == UIO_WRITE); 436 437 ip = VTOI(vp); 438 fs = ip->I_FS; 439 440 KASSERT(vp->v_size == ip->i_size); 441 442 if (uio->uio_offset < 0 || 443 uio->uio_resid > fs->um_maxfilesize || 444 uio->uio_offset > (fs->um_maxfilesize - uio->uio_resid)) 445 return EFBIG; 446 KASSERT(vp != fs->lfs_ivnode); 447 if (uio->uio_resid == 0) 448 return 0; 449 450 flags = ioflag & IO_SYNC ? B_SYNC : 0; 451 resid = uio->uio_resid; 452 osize = ip->i_size; 453 error = 0; 454 455 KASSERT(vp->v_type != VREG); 456 457 lfs_availwait(fs, lfs_btofsb(fs, uio->uio_resid)); 458 lfs_check(vp, LFS_UNUSED_LBN, 0); 459 460 /* XXX Should never have pages cached here. */ 461 KASSERT(vp->v_uobj.uo_npages == 0); 462 while (uio->uio_resid > 0) { 463 lbn = lfs_lblkno(fs, uio->uio_offset); 464 blkoffset = lfs_blkoff(fs, uio->uio_offset); 465 xfersize = MIN(fs_sb_getbsize(fs) - blkoffset, uio->uio_resid); 466 if (fs_sb_getbsize(fs) > xfersize) 467 flags |= B_CLRBUF; 468 else 469 flags &= ~B_CLRBUF; 470 471 error = lfs_reserve(fs, vp, NULL, 472 lfs_btofsb(fs, (ULFS_NIADDR + 1) << lfs_sb_getbshift(fs))); 473 if (error) 474 break; 475 need_unreserve = true; 476 error = lfs_balloc(vp, uio->uio_offset, xfersize, cred, flags, 477 &bp); 478 479 if (error) 480 break; 481 if (uio->uio_offset + xfersize > ip->i_size) { 482 ip->i_size = uio->uio_offset + xfersize; 483 DIP_ASSIGN(ip, size, ip->i_size); 484 uvm_vnp_setsize(vp, ip->i_size); 485 extended = 1; 486 } 487 size = lfs_blksize(fs, ip, lbn) - bp->b_resid; 488 if (xfersize > size) 489 xfersize = size; 490 491 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); 492 493 /* 494 * if we didn't clear the block and the uiomove failed, 495 * the buf will now contain part of some other file, 496 * so we need to invalidate it. 497 */ 498 if (error && (flags & B_CLRBUF) == 0) { 499 brelse(bp, BC_INVAL); 500 break; 501 } 502 (void)VOP_BWRITE(bp->b_vp, bp); 503 lfs_reserve(fs, vp, NULL, 504 -lfs_btofsb(fs, (ULFS_NIADDR + 1) << lfs_sb_getbshift(fs))); 505 need_unreserve = false; 506 if (error || xfersize == 0) 507 break; 508 } 509 if (need_unreserve) { 510 lfs_reserve(fs, vp, NULL, 511 -lfs_btofsb(fs, (ULFS_NIADDR + 1) << lfs_sb_getbshift(fs))); 512 } 513 514 error = ulfs_post_write_update(vp, uio, ioflag, cred, osize, resid, 515 extended, error); 516 517 return (error); 518 } 519 520 static int 521 ulfs_post_write_update(struct vnode *vp, struct uio *uio, int ioflag, 522 kauth_cred_t cred, off_t osize, int resid, int extended, int oerror) 523 { 524 struct inode *ip = VTOI(vp); 525 int error = oerror; 526 527 /* Trigger ctime and mtime updates, and atime if MNT_RELATIME. */ 528 ip->i_state |= IN_CHANGE | IN_UPDATE; 529 if (vp->v_mount->mnt_flag & MNT_RELATIME) 530 ip->i_state |= IN_ACCESS; 531 532 /* 533 * If we successfully wrote any data and we are not the superuser, 534 * we clear the setuid and setgid bits as a precaution against 535 * tampering. 536 */ 537 if (resid > uio->uio_resid && cred) { 538 if (ip->i_mode & ISUID) { 539 if (kauth_authorize_vnode(cred, 540 KAUTH_VNODE_RETAIN_SUID, vp, NULL, EPERM) != 0) { 541 ip->i_mode &= ~ISUID; 542 DIP_ASSIGN(ip, mode, ip->i_mode); 543 } 544 } 545 546 if (ip->i_mode & ISGID) { 547 if (kauth_authorize_vnode(cred, 548 KAUTH_VNODE_RETAIN_SGID, vp, NULL, EPERM) != 0) { 549 ip->i_mode &= ~ISGID; 550 DIP_ASSIGN(ip, mode, ip->i_mode); 551 } 552 } 553 } 554 555 /* If we successfully wrote anything, notify kevent listeners. */ 556 if (resid > uio->uio_resid) 557 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0)); 558 559 /* 560 * Update the size on disk: truncate back to original size on 561 * error, or reflect the new size on success. 562 */ 563 if (error) { 564 (void) lfs_truncate(vp, osize, ioflag & IO_SYNC, cred); 565 uio->uio_offset -= resid - uio->uio_resid; 566 uio->uio_resid = resid; 567 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) { 568 error = lfs_update(vp, NULL, NULL, UPDATE_WAIT); 569 } else { 570 /* nothing */ 571 } 572 573 /* Make sure the vnode uvm size matches the inode file size. */ 574 KASSERT(vp->v_size == ip->i_size); 575 576 /* Write error overrides any inode update error. */ 577 if (oerror) 578 error = oerror; 579 return error; 580 } 581