1 /* $NetBSD: ext2fs_balloc.c,v 1.8 2000/12/10 06:38:31 chs Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Manuel Bouyer. 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93 37 * Modified for ext2fs by Manuel Bouyer. 38 */ 39 40 #if defined(_KERNEL) && !defined(_LKM) 41 #include "opt_uvmhist.h" 42 #endif 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/buf.h> 47 #include <sys/proc.h> 48 #include <sys/file.h> 49 #include <sys/vnode.h> 50 #include <sys/mount.h> 51 52 #include <uvm/uvm.h> 53 54 #include <ufs/ufs/quota.h> 55 #include <ufs/ufs/inode.h> 56 #include <ufs/ufs/ufs_extern.h> 57 58 #include <ufs/ext2fs/ext2fs.h> 59 #include <ufs/ext2fs/ext2fs_extern.h> 60 61 /* 62 * Balloc defines the structure of file system storage 63 * by allocating the physical blocks on a device given 64 * the inode and the logical block number in a file. 65 */ 66 int 67 ext2fs_balloc(ip, bn, size, cred, bpp, flags) 68 struct inode *ip; 69 ufs_daddr_t bn; 70 int size; 71 struct ucred *cred; 72 struct buf **bpp; 73 int flags; 74 { 75 struct m_ext2fs *fs; 76 ufs_daddr_t nb; 77 struct buf *bp, *nbp; 78 struct vnode *vp = ITOV(ip); 79 struct indir indirs[NIADDR + 2]; 80 ufs_daddr_t newb, lbn, *bap, pref; 81 int num, i, error; 82 u_int deallocated; 83 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 84 int unwindidx = -1; 85 UVMHIST_FUNC("ext2fs_balloc"); UVMHIST_CALLED(ubchist); 86 87 UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0); 88 89 if (bpp != NULL) { 90 *bpp = NULL; 91 } 92 if (bn < 0) 93 return (EFBIG); 94 fs = ip->i_e2fs; 95 lbn = bn; 96 97 /* 98 * The first NDADDR blocks are direct blocks 99 */ 100 if (bn < NDADDR) { 101 nb = fs2h32(ip->i_e2fs_blocks[bn]); 102 if (nb != 0) { 103 104 /* 105 * the block is already allocated, just read it. 106 */ 107 108 if (bpp != NULL) { 109 error = bread(vp, bn, fs->e2fs_bsize, NOCRED, 110 &bp); 111 if (error) { 112 brelse(bp); 113 return (error); 114 } 115 *bpp = bp; 116 } 117 return (0); 118 } 119 120 /* 121 * allocate a new direct block. 122 */ 123 124 error = ext2fs_alloc(ip, bn, 125 ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]), 126 cred, &newb); 127 if (error) 128 return (error); 129 ip->i_e2fs_last_lblk = lbn; 130 ip->i_e2fs_last_blk = newb; 131 ip->i_e2fs_blocks[bn] = h2fs32(newb); 132 ip->i_flag |= IN_CHANGE | IN_UPDATE; 133 if (bpp != NULL) { 134 bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); 135 bp->b_blkno = fsbtodb(fs, newb); 136 if (flags & B_CLRBUF) 137 clrbuf(bp); 138 *bpp = bp; 139 } 140 return (0); 141 } 142 /* 143 * Determine the number of levels of indirection. 144 */ 145 pref = 0; 146 if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0) 147 return(error); 148 #ifdef DIAGNOSTIC 149 if (num < 1) 150 panic ("ext2fs_balloc: ufs_getlbns returned indirect block\n"); 151 #endif 152 /* 153 * Fetch the first indirect block allocating if necessary. 154 */ 155 --num; 156 nb = fs2h32(ip->i_e2fs_blocks[NDADDR + indirs[0].in_off]); 157 allocib = NULL; 158 allocblk = allociblk; 159 if (nb == 0) { 160 pref = ext2fs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); 161 error = ext2fs_alloc(ip, lbn, pref, cred, &newb); 162 if (error) 163 return (error); 164 nb = newb; 165 *allocblk++ = nb; 166 ip->i_e2fs_last_blk = newb; 167 bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0); 168 bp->b_blkno = fsbtodb(fs, newb); 169 clrbuf(bp); 170 /* 171 * Write synchronously so that indirect blocks 172 * never point at garbage. 173 */ 174 if ((error = bwrite(bp)) != 0) 175 goto fail; 176 unwindidx = 0; 177 allocib = &ip->i_e2fs_blocks[NDADDR + indirs[0].in_off]; 178 *allocib = h2fs32(newb); 179 ip->i_flag |= IN_CHANGE | IN_UPDATE; 180 } 181 /* 182 * Fetch through the indirect blocks, allocating as necessary. 183 */ 184 for (i = 1;;) { 185 error = bread(vp, 186 indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp); 187 if (error) { 188 brelse(bp); 189 goto fail; 190 } 191 bap = (ufs_daddr_t *)bp->b_data; 192 nb = fs2h32(bap[indirs[i].in_off]); 193 if (i == num) 194 break; 195 i++; 196 if (nb != 0) { 197 brelse(bp); 198 continue; 199 } 200 pref = ext2fs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); 201 error = ext2fs_alloc(ip, lbn, pref, cred, &newb); 202 if (error) { 203 brelse(bp); 204 goto fail; 205 } 206 nb = newb; 207 *allocblk++ = nb; 208 ip->i_e2fs_last_blk = newb; 209 nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0); 210 nbp->b_blkno = fsbtodb(fs, nb); 211 clrbuf(nbp); 212 /* 213 * Write synchronously so that indirect blocks 214 * never point at garbage. 215 */ 216 if ((error = bwrite(nbp)) != 0) { 217 brelse(bp); 218 goto fail; 219 } 220 if (unwindidx < 0) 221 unwindidx = i - 1; 222 bap[indirs[i - 1].in_off] = h2fs32(nb); 223 /* 224 * If required, write synchronously, otherwise use 225 * delayed write. 226 */ 227 if (flags & B_SYNC) { 228 bwrite(bp); 229 } else { 230 bdwrite(bp); 231 } 232 } 233 /* 234 * Get the data block, allocating if necessary. 235 */ 236 if (nb == 0) { 237 pref = ext2fs_blkpref(ip, lbn, indirs[num].in_off, &bap[0]); 238 error = ext2fs_alloc(ip, lbn, pref, cred, &newb); 239 if (error) { 240 brelse(bp); 241 goto fail; 242 } 243 nb = newb; 244 *allocblk++ = nb; 245 ip->i_e2fs_last_lblk = lbn; 246 ip->i_e2fs_last_blk = newb; 247 bap[indirs[num].in_off] = h2fs32(nb); 248 /* 249 * If required, write synchronously, otherwise use 250 * delayed write. 251 */ 252 if (flags & B_SYNC) { 253 bwrite(bp); 254 } else { 255 bdwrite(bp); 256 } 257 if (bpp != NULL) { 258 nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); 259 nbp->b_blkno = fsbtodb(fs, nb); 260 if (flags & B_CLRBUF) 261 clrbuf(nbp); 262 *bpp = nbp; 263 } 264 return (0); 265 } 266 brelse(bp); 267 if (bpp != NULL) { 268 if (flags & B_CLRBUF) { 269 error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, 270 &nbp); 271 if (error) { 272 brelse(nbp); 273 goto fail; 274 } 275 } else { 276 nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); 277 nbp->b_blkno = fsbtodb(fs, nb); 278 } 279 *bpp = nbp; 280 } 281 return (0); 282 fail: 283 /* 284 * If we have failed part way through block allocation, we 285 * have to deallocate any indirect blocks that we have allocated. 286 */ 287 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { 288 ext2fs_blkfree(ip, *blkp); 289 deallocated += fs->e2fs_bsize; 290 } 291 if (unwindidx >= 0) { 292 if (unwindidx == 0) { 293 *allocib = 0; 294 } else { 295 int r; 296 297 r = bread(vp, indirs[unwindidx].in_lbn, 298 (int)fs->e2fs_bsize, NOCRED, &bp); 299 if (r) { 300 panic("Could not unwind indirect block, error %d", r); 301 brelse(bp); 302 } else { 303 bap = (ufs_daddr_t *)bp->b_data; 304 bap[indirs[unwindidx].in_off] = 0; 305 if (flags & B_SYNC) 306 bwrite(bp); 307 else 308 bdwrite(bp); 309 } 310 } 311 for (i = unwindidx + 1; i <= num; i++) { 312 bp = getblk(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, 313 0, 0); 314 bp->b_flags |= B_INVAL; 315 brelse(bp); 316 } 317 } 318 if (deallocated) { 319 ip->i_e2fs_nblock -= btodb(deallocated); 320 ip->i_e2fs_flags |= IN_CHANGE | IN_UPDATE; 321 } 322 return error; 323 } 324 325 int 326 ext2fs_ballocn(v) 327 void *v; 328 { 329 struct vop_ballocn_args /* { 330 struct vnode *a_vp; 331 off_t a_offset; 332 off_t a_length; 333 struct ucred *a_cred; 334 int a_flags; 335 } */ *ap = v; 336 off_t off, len; 337 struct vnode *vp = ap->a_vp; 338 struct inode *ip = VTOI(vp); 339 struct m_ext2fs *fs = ip->i_e2fs; 340 int error, delta, bshift, bsize; 341 UVMHIST_FUNC("ext2fs_ballocn"); UVMHIST_CALLED(ubchist); 342 343 bshift = fs->e2fs_bshift; 344 bsize = 1 << bshift; 345 346 off = ap->a_offset; 347 len = ap->a_length; 348 349 delta = off & (bsize - 1); 350 off -= delta; 351 len += delta; 352 353 while (len > 0) { 354 bsize = min(bsize, len); 355 UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x", 356 off, len, bsize, 0); 357 358 error = ext2fs_balloc(ip, lblkno(fs, off), bsize, ap->a_cred, 359 NULL, ap->a_flags); 360 if (error) { 361 UVMHIST_LOG(ubchist, "error %d", error, 0,0,0); 362 return error; 363 } 364 365 /* 366 * increase file size now, VOP_BALLOC() requires that 367 * EOF be up-to-date before each call. 368 */ 369 370 if (ip->i_e2fs_size < off + bsize) { 371 UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", 372 ip->i_e2fs_size, off + bsize,0,0); 373 ip->i_e2fs_size = off + bsize; 374 if (vp->v_uvm.u_size < ip->i_e2fs_size) { 375 uvm_vnp_setsize(vp, ip->i_e2fs_size); 376 } 377 } 378 379 off += bsize; 380 len -= bsize; 381 } 382 return 0; 383 } 384 385 /* 386 * allocate a range of blocks in a file. 387 * after this function returns, any page entirely contained within the range 388 * will map to invalid data and thus must be overwritten before it is made 389 * accessible to others. 390 */ 391 392 int 393 ext2fs_balloc_range(vp, off, len, cred, flags) 394 struct vnode *vp; 395 off_t off, len; 396 struct ucred *cred; 397 int flags; 398 { 399 off_t oldeof, eof, pagestart; 400 struct uvm_object *uobj; 401 int i, delta, error, npages; 402 int bshift = vp->v_mount->mnt_fs_bshift; 403 int bsize = 1 << bshift; 404 int ppb = max(bsize >> PAGE_SHIFT, 1); 405 struct vm_page *pgs[ppb]; 406 UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist); 407 UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x", 408 vp, off, len, vp->v_uvm.u_size); 409 410 error = 0; 411 uobj = &vp->v_uvm.u_obj; 412 oldeof = vp->v_uvm.u_size; 413 eof = max(oldeof, off + len); 414 UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0); 415 pgs[0] = NULL; 416 417 /* 418 * cache the new range of the file. this will create zeroed pages 419 * where the new block will be and keep them locked until the 420 * new block is allocated, so there will be no window where 421 * the old contents of the new block is visible to racing threads. 422 */ 423 424 pagestart = trunc_page(off) & ~(bsize - 1); 425 npages = min(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT); 426 memset(pgs, 0, npages); 427 simple_lock(&uobj->vmobjlock); 428 error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, 429 VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF); 430 if (error) { 431 UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0); 432 goto errout; 433 } 434 for (i = 0; i < npages; i++) { 435 UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0); 436 KASSERT((pgs[i]->flags & PG_RELEASED) == 0); 437 pgs[i]->flags &= ~PG_CLEAN; 438 uvm_pageactivate(pgs[i]); 439 } 440 441 /* 442 * adjust off to be block-aligned. 443 */ 444 445 delta = off & (bsize - 1); 446 off -= delta; 447 len += delta; 448 449 /* 450 * now allocate the range. 451 */ 452 453 lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL); 454 error = VOP_BALLOCN(vp, off, len, cred, flags); 455 UVMHIST_LOG(ubchist, "ballocn %d", error,0,0,0); 456 lockmgr(&vp->v_glock, LK_RELEASE, NULL); 457 458 /* 459 * unbusy any pages we are holding. 460 */ 461 462 errout: 463 simple_lock(&uobj->vmobjlock); 464 if (error) { 465 (void) (uobj->pgops->pgo_flush)(uobj, oldeof, pagestart + ppb, 466 PGO_FREE); 467 } 468 if (pgs[0] != NULL) { 469 uvm_page_unbusy(pgs, npages); 470 } 471 simple_unlock(&uobj->vmobjlock); 472 return (error); 473 } 474